diff --git a/cgroups/apply_raw.go b/cgroups/apply_raw.go index 47a2a00..5fe3179 100644 --- a/cgroups/apply_raw.go +++ b/cgroups/apply_raw.go @@ -49,6 +49,9 @@ func rawApply(c *Cgroup, pid int) (ActiveCgroup, error) { if err := raw.setupCpu(c, pid); err != nil { return nil, err } + if err := raw.setupCpuset(c, pid); err != nil { + return nil, err + } return raw, nil } @@ -170,6 +173,25 @@ func (raw *rawCgroup) setupCpu(c *Cgroup, pid int) (err error) { return nil } +func (raw *rawCgroup) setupCpuset(c *Cgroup, pid int) (err error) { + if c.CpusetCpus != "" { + dir, err := raw.join("cpuset", pid) + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if err := writeFile(dir, "cpuset.cpus", c.CpusetCpus); err != nil { + return err + } + } + return nil +} + func (raw *rawCgroup) Cleanup() error { get := func(subsystem string) string { path, _ := raw.path(subsystem) @@ -180,6 +202,7 @@ func (raw *rawCgroup) Cleanup() error { get("memory"), get("devices"), get("cpu"), + get("cpuset"), } { if path != "" { os.RemoveAll(path) diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 8554ba9..5fe1034 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -15,10 +15,11 @@ type Cgroup struct { Name string `json:"name,omitempty"` Parent string `json:"parent,omitempty"` - DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice - Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) - MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap - CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) + DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice + Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) + MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap + CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) + CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use UnitProperties [][2]string `json:"unit_properties,omitempty"` // systemd unit properties } diff --git a/libcontainer/capabilities/capabilities.go b/libcontainer/capabilities/capabilities.go index fbf7353..4b81e70 100644 --- a/libcontainer/capabilities/capabilities.go +++ b/libcontainer/capabilities/capabilities.go @@ -27,7 +27,9 @@ func DropCapabilities(container *libcontainer.Container) error { func getCapabilitiesMask(container *libcontainer.Container) []capability.Cap { drop := []capability.Cap{} for _, c := range container.CapabilitiesMask { - drop = append(drop, c.Value) + if !c.Enabled { + drop = append(drop, c.Value) + } } return drop } diff --git a/libcontainer/network/netns.go b/libcontainer/network/netns.go new file mode 100644 index 0000000..7e311f2 --- /dev/null +++ b/libcontainer/network/netns.go @@ -0,0 +1,34 @@ +package network + +import ( + "fmt" + "os" + "syscall" + + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/system" +) + +// crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace +type NetNS struct { +} + +func (v *NetNS) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error { + context["nspath"] = n.Context["nspath"] + return nil +} + +func (v *NetNS) Initialize(config *libcontainer.Network, context libcontainer.Context) error { + nspath, exists := context["nspath"] + if !exists { + return fmt.Errorf("nspath does not exist in network context") + } + f, err := os.OpenFile(nspath, os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed get network namespace fd: %v", err) + } + if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil { + return fmt.Errorf("failed to setns current network namespace: %v", err) + } + return nil +} diff --git a/libcontainer/network/strategy.go b/libcontainer/network/strategy.go index 693790d..e41ecc3 100644 --- a/libcontainer/network/strategy.go +++ b/libcontainer/network/strategy.go @@ -2,6 +2,7 @@ package network import ( "errors" + "github.com/dotcloud/docker/pkg/libcontainer" ) @@ -12,6 +13,7 @@ var ( var strategies = map[string]NetworkStrategy{ "veth": &Veth{}, "loopback": &Loopback{}, + "netns": &NetNS{}, } // NetworkStrategy represents a specific network configuration for diff --git a/libcontainer/nsinit/command.go b/libcontainer/nsinit/command.go index 5546065..153a48a 100644 --- a/libcontainer/nsinit/command.go +++ b/libcontainer/nsinit/command.go @@ -39,7 +39,9 @@ func (c *DefaultCommandFactory) Create(container *libcontainer.Container, consol // flags on clone, unshare, and setns func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { for _, ns := range namespaces { - flag |= ns.Value + if ns.Enabled { + flag |= ns.Value + } } return flag } diff --git a/libcontainer/nsinit/exec.go b/libcontainer/nsinit/exec.go index a44faaf..73842f7 100644 --- a/libcontainer/nsinit/exec.go +++ b/libcontainer/nsinit/exec.go @@ -3,13 +3,14 @@ package nsinit import ( + "os" + "os/exec" + "syscall" + "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" - "os" - "os/exec" - "syscall" ) // Exec performes setup outside of a namespace so that a container can be diff --git a/libcontainer/nsinit/init.go b/libcontainer/nsinit/init.go index 5aa5f9f..8518232 100644 --- a/libcontainer/nsinit/init.go +++ b/libcontainer/nsinit/init.go @@ -4,6 +4,10 @@ package nsinit import ( "fmt" + "os" + "runtime" + "syscall" + "github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/apparmor" @@ -12,9 +16,6 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/user" - "os" - "runtime" - "syscall" ) // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, @@ -58,13 +59,13 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol if err := system.ParentDeathSignal(uintptr(syscall.SIGTERM)); err != nil { return fmt.Errorf("parent death signal %s", err) } + if err := setupNetwork(container, context); err != nil { + return fmt.Errorf("setup networking %s", err) + } ns.logger.Println("setup mount namespace") if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if err := setupNetwork(container, context); err != nil { - return fmt.Errorf("setup networking %s", err) - } if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } diff --git a/libcontainer/nsinit/mount.go b/libcontainer/nsinit/mount.go index 796143c..a10f2b5 100644 --- a/libcontainer/nsinit/mount.go +++ b/libcontainer/nsinit/mount.go @@ -32,11 +32,6 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mouting %s as bind %s", rootfs, err) } - if readonly { - if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { - return fmt.Errorf("mounting %s as readonly %s", rootfs, err) - } - } if err := mountSystem(rootfs, mountLabel); err != nil { return fmt.Errorf("mount system %s", err) } @@ -82,6 +77,12 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons } } + if readonly { + if err := system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mounting %s as readonly %s", rootfs, err) + } + } + system.Umask(0022) return nil diff --git a/libcontainer/nsinit/nsinit/main.go b/libcontainer/nsinit/nsinit/main.go index df32d0b..37aa784 100644 --- a/libcontainer/nsinit/nsinit/main.go +++ b/libcontainer/nsinit/nsinit/main.go @@ -3,14 +3,15 @@ package main import ( "encoding/json" "flag" - "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/nsinit" "io" "io/ioutil" "log" "os" "path/filepath" "strconv" + + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/nsinit" ) var ( diff --git a/libcontainer/types.go b/libcontainer/types.go index 94fe876..ffeb55a 100644 --- a/libcontainer/types.go +++ b/libcontainer/types.go @@ -1,7 +1,6 @@ package libcontainer import ( - "encoding/json" "errors" "github.com/syndtr/gocapability/capability" ) @@ -19,29 +18,30 @@ var ( namespaceList = Namespaces{} capabilityList = Capabilities{ - {Key: "SETPCAP", Value: capability.CAP_SETPCAP}, - {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE}, - {Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO}, - {Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT}, - {Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN}, - {Key: "SYS_NICE", Value: capability.CAP_SYS_NICE}, - {Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE}, - {Key: "SYS_TIME", Value: capability.CAP_SYS_TIME}, - {Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG}, - {Key: "MKNOD", Value: capability.CAP_MKNOD}, - {Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE}, - {Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL}, - {Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE}, - {Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN}, - {Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN}, + {Key: "SETPCAP", Value: capability.CAP_SETPCAP, Enabled: false}, + {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE, Enabled: false}, + {Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO, Enabled: false}, + {Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT, Enabled: false}, + {Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN, Enabled: false}, + {Key: "SYS_NICE", Value: capability.CAP_SYS_NICE, Enabled: false}, + {Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE, Enabled: false}, + {Key: "SYS_TIME", Value: capability.CAP_SYS_TIME, Enabled: false}, + {Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG, Enabled: false}, + {Key: "MKNOD", Value: capability.CAP_MKNOD, Enabled: false}, + {Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE, Enabled: false}, + {Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL, Enabled: false}, + {Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE, Enabled: false}, + {Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN, Enabled: false}, + {Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN, Enabled: false}, } ) type ( Namespace struct { - Key string - Value int - File string + Key string `json:"key,omitempty"` + Enabled bool `json:"enabled,omitempty"` + Value int `json:"value,omitempty"` + File string `json:"file,omitempty"` } Namespaces []*Namespace ) @@ -50,27 +50,11 @@ func (ns *Namespace) String() string { return ns.Key } -func (ns *Namespace) MarshalJSON() ([]byte, error) { - return json.Marshal(ns.Key) -} - -func (ns *Namespace) UnmarshalJSON(src []byte) error { - var nsName string - if err := json.Unmarshal(src, &nsName); err != nil { - return err - } - ret := GetNamespace(nsName) - if ret == nil { - return ErrUnkownNamespace - } - *ns = *ret - return nil -} - func GetNamespace(key string) *Namespace { for _, ns := range namespaceList { if ns.Key == key { - return ns + cpy := *ns + return &cpy } } return nil @@ -79,18 +63,23 @@ func GetNamespace(key string) *Namespace { // Contains returns true if the specified Namespace is // in the slice func (n Namespaces) Contains(ns string) bool { + return n.Get(ns) != nil +} + +func (n Namespaces) Get(ns string) *Namespace { for _, nsp := range n { if nsp.Key == ns { - return true + return nsp } } - return false + return nil } type ( Capability struct { - Key string - Value capability.Cap + Key string `json:"key,omitempty"` + Enabled bool `json:"enabled"` + Value capability.Cap `json:"value,omitempty"` } Capabilities []*Capability ) @@ -99,27 +88,11 @@ func (c *Capability) String() string { return c.Key } -func (c *Capability) MarshalJSON() ([]byte, error) { - return json.Marshal(c.Key) -} - -func (c *Capability) UnmarshalJSON(src []byte) error { - var capName string - if err := json.Unmarshal(src, &capName); err != nil { - return err - } - ret := GetCapability(capName) - if ret == nil { - return ErrUnkownCapability - } - *c = *ret - return nil -} - func GetCapability(key string) *Capability { for _, capp := range capabilityList { if capp.Key == key { - return capp + cpy := *capp + return &cpy } } return nil @@ -128,10 +101,14 @@ func GetCapability(key string) *Capability { // Contains returns true if the specified Capability is // in the slice func (c Capabilities) Contains(capp string) bool { + return c.Get(capp) != nil +} + +func (c Capabilities) Get(capp string) *Capability { for _, cap := range c { if cap.Key == capp { - return true + return cap } } - return false + return nil } diff --git a/libcontainer/types_linux.go b/libcontainer/types_linux.go index c14531d..1f937e0 100644 --- a/libcontainer/types_linux.go +++ b/libcontainer/types_linux.go @@ -6,11 +6,11 @@ import ( func init() { namespaceList = Namespaces{ - {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"}, - {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"}, - {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"}, - {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"}, - {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"}, - {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"}, + {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt", Enabled: true}, + {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts", Enabled: true}, + {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc", Enabled: true}, + {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user", Enabled: true}, + {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid", Enabled: true}, + {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net", Enabled: true}, } }