Merge pull request #5922 from crosbymichael/host-dev-priv

Mount /dev in tmpfs for privileged containers
This commit is contained in:
Victor Vieux 2014-05-21 18:56:24 -07:00
commit a0841ff1eb
6 changed files with 131 additions and 40 deletions

View file

@ -4,26 +4,69 @@ import (
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
// Context is a generic key value pair that allows
// arbatrary data to be sent
// Context is a generic key value pair that allows arbatrary data to be sent
type Context map[string]string
// Container defines configuration options for how a
// container is setup inside a directory and how a process should be executed
// Container defines configuration options for executing a process inside a contained environment
type Container struct {
Hostname string `json:"hostname,omitempty"` // hostname
ReadonlyFs bool `json:"readonly_fs,omitempty"` // set the containers rootfs as readonly
NoPivotRoot bool `json:"no_pivot_root,omitempty"` // this can be enabled if you are running in ramdisk
User string `json:"user,omitempty"` // user to execute the process as
WorkingDir string `json:"working_dir,omitempty"` // current working directory
Env []string `json:"environment,omitempty"` // environment to set
Tty bool `json:"tty,omitempty"` // setup a proper tty or not
Namespaces map[string]bool `json:"namespaces,omitempty"` // namespaces to apply
Capabilities []string `json:"capabilities,omitempty"` // capabilities given to the container
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname,omitempty"`
// ReadonlyFs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable
ReadonlyFs bool `json:"readonly_fs,omitempty"`
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root,omitempty"`
// User will set the uid and gid of the executing process running inside the container
User string `json:"user,omitempty"`
// WorkingDir will change the processes current working directory inside the container's rootfs
WorkingDir string `json:"working_dir,omitempty"`
// Env will populate the processes environment with the provided values
// Any values from the parent processes will be cleared before the values
// provided in Env are provided to the process
Env []string `json:"environment,omitempty"`
// Tty when true will allocate a pty slave on the host for access by the container's process
// and ensure that it is mounted inside the container's rootfs
Tty bool `json:"tty,omitempty"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces map[string]bool `json:"namespaces,omitempty"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities,omitempty"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks,omitempty"`
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"`
// Context is a generic key value format that allows for additional settings to be passed
// on the container's creation
// This is commonly used to specify apparmor profiles, selinux labels, and different restrictions
// placed on the container's processes
Context Context `json:"context,omitempty"`
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
Mounts Mounts `json:"mounts,omitempty"`
// RequiredDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
// If the host system does not support the device that the container requests an error is returned
RequiredDeviceNodes []string `json:"required_device_nodes,omitempty"`
// OptionalDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
// If the host system does not support the device that the container requests the error is ignored
OptionalDeviceNodes []string `json:"optional_device_nodes,omitempty"`
}
// Network defines configuration for a container's networking stack
@ -31,9 +74,20 @@ type Container struct {
// The network configuration can be omited from a container causing the
// container to be setup with the host's networking stack
type Network struct {
Type string `json:"type,omitempty"` // type of networking to setup i.e. veth, macvlan, etc
Context Context `json:"context,omitempty"` // generic context for type specific networking options
// Type sets the networks type, commonly veth and loopback
Type string `json:"type,omitempty"`
// Context is a generic key value format for setting additional options that are specific to
// the network type
Context Context `json:"context,omitempty"`
// Address contains the IP and mask to set on the network interface
Address string `json:"address,omitempty"`
// Gateway sets the gateway address that is used as the default for the interface
Gateway string `json:"gateway,omitempty"`
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
Mtu int `json:"mtu,omitempty"`
}

View file

@ -43,5 +43,13 @@
{
"type": "devtmpfs"
}
],
"required_device_nodes": [
"null",
"zero",
"full",
"random",
"urandom",
"tty"
]
}

View file

@ -4,12 +4,14 @@ import (
"encoding/json"
"os"
"testing"
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
)
// Checks whether the expected capability is specified in the capabilities.
func hasCapability(expected string, capabilities []string) bool {
for _, capability := range capabilities {
if capability == expected {
func contains(expected string, values []string) bool {
for _, v := range values {
if v == expected {
return true
}
}
@ -47,18 +49,25 @@ func TestContainerJsonFormat(t *testing.T) {
t.Fail()
}
if hasCapability("SYS_ADMIN", container.Capabilities) {
if contains("SYS_ADMIN", container.Capabilities) {
t.Log("SYS_ADMIN should not be enabled in capabilities mask")
t.Fail()
}
if !hasCapability("MKNOD", container.Capabilities) {
if !contains("MKNOD", container.Capabilities) {
t.Log("MKNOD should be enabled in capabilities mask")
t.Fail()
}
if hasCapability("SYS_CHROOT", container.Capabilities) {
if contains("SYS_CHROOT", container.Capabilities) {
t.Log("capabilities mask should not contain SYS_CHROOT")
t.Fail()
}
for _, n := range nodes.DefaultNodes {
if !contains(n, container.RequiredDeviceNodes) {
t.Logf("devices should contain %s", n)
t.Fail()
}
}
}

View file

@ -48,11 +48,11 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
if err := setupBindmounts(rootfs, container.Mounts); err != nil {
return fmt.Errorf("bind mounts %s", err)
}
if err := nodes.CopyN(rootfs, nodes.DefaultNodes, true); err != nil {
return fmt.Errorf("copy dev nodes %s", err)
if err := nodes.CopyN(rootfs, container.RequiredDeviceNodes, true); err != nil {
return fmt.Errorf("copy required dev nodes %s", err)
}
if err := nodes.CopyN(rootfs, nodes.AdditionalNodes, false); err != nil {
return fmt.Errorf("copy additional dev nodes %s", err)
if err := nodes.CopyN(rootfs, container.OptionalDeviceNodes, false); err != nil {
return fmt.Errorf("copy optional dev nodes %s", err)
}
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
return err
@ -195,12 +195,10 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo
systemMounts := []mount{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
}
if len(mounts.OfType("devtmpfs")) == 1 {
systemMounts = append([]mount{{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}}, systemMounts...)
}
return systemMounts
}

View file

@ -4,6 +4,7 @@ package nodes
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
@ -21,11 +22,6 @@ var DefaultNodes = []string{
"tty",
}
// AdditionalNodes includes nodes that are not required
var AdditionalNodes = []string{
"fuse",
}
// CopyN copies the device node from the host into the rootfs
func CopyN(rootfs string, nodesToCopy []string, shouldExist bool) error {
oldMask := system.Umask(0000)
@ -61,3 +57,18 @@ func Copy(rootfs, node string, shouldExist bool) error {
}
return nil
}
func GetHostDeviceNodes() ([]string, error) {
files, err := ioutil.ReadDir("/dev")
if err != nil {
return nil, err
}
out := []string{}
for _, f := range files {
if f.Mode()&os.ModeDevice == os.ModeDevice {
out = append(out, f.Name())
}
}
return out, nil
}

View file

@ -0,0 +1,11 @@
// +build !linux
package nodes
import "github.com/dotcloud/docker/pkg/libcontainer"
var DefaultNodes = []string{}
func GetHostDeviceNodes() ([]string, error) {
return nil, libcontainer.ErrUnsupported
}