diff --git a/cgroups/apply_raw.go b/cgroups/apply_raw.go new file mode 100644 index 0000000..bce96f4 --- /dev/null +++ b/cgroups/apply_raw.go @@ -0,0 +1,189 @@ +package cgroups + +import ( + "fmt" + "os" + "path/filepath" + "strconv" +) + +type rawCgroup struct { + root string + cgroup string +} + +func rawApply(c *Cgroup, pid int) (ActiveCgroup, error) { + // We have two implementation of cgroups support, one is based on + // systemd and the dbus api, and one is based on raw cgroup fs operations + // following the pre-single-writer model docs at: + // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ + // + // we can pick any subsystem to find the root + + cgroupRoot, err := FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return nil, fmt.Errorf("cgroups fs not found") + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + raw := &rawCgroup{ + root: cgroupRoot, + cgroup: cgroup, + } + + if err := raw.setupDevices(c, pid); err != nil { + return nil, err + } + if err := raw.setupMemory(c, pid); err != nil { + return nil, err + } + if err := raw.setupCpu(c, pid); err != nil { + return nil, err + } + return raw, nil +} + +func (raw *rawCgroup) path(subsystem string) (string, error) { + initPath, err := GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + return filepath.Join(raw.root, subsystem, initPath, raw.cgroup), nil +} + +func (raw *rawCgroup) join(subsystem string, pid int) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil { + return "", err + } + return path, nil +} + +func (raw *rawCgroup) setupDevices(c *Cgroup, pid int) (err error) { + if !c.DeviceAccess { + dir, err := raw.join("devices", pid) + if err != nil { + return err + } + + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if err := writeFile(dir, "devices.deny", "a"); err != nil { + return err + } + + allow := []string{ + // /dev/null, zero, full + "c 1:3 rwm", + "c 1:5 rwm", + "c 1:7 rwm", + + // consoles + "c 5:1 rwm", + "c 5:0 rwm", + "c 4:0 rwm", + "c 4:1 rwm", + + // /dev/urandom,/dev/random + "c 1:9 rwm", + "c 1:8 rwm", + + // /dev/pts/ - pts namespaces are "coming soon" + "c 136:* rwm", + "c 5:2 rwm", + + // tuntap + "c 10:200 rwm", + } + + for _, val := range allow { + if err := writeFile(dir, "devices.allow", val); err != nil { + return err + } + } + } + return nil +} + +func (raw *rawCgroup) setupMemory(c *Cgroup, pid int) (err error) { + if c.Memory != 0 || c.MemorySwap != 0 { + dir, err := raw.join("memory", pid) + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if c.Memory != 0 { + if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { + return err + } + if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { + return err + } + } + // By default, MemorySwap is set to twice the size of RAM. + // If you want to omit MemorySwap, set it to `-1'. + if c.MemorySwap != -1 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil { + return err + } + } + } + return nil +} + +func (raw *rawCgroup) setupCpu(c *Cgroup, pid int) (err error) { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + dir, err := raw.join("cpu", pid) + if err != nil { + return err + } + if c.CpuShares != 0 { + if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil { + return err + } + } + return nil +} + +func (raw *rawCgroup) Cleanup() error { + get := func(subsystem string) string { + path, _ := raw.path(subsystem) + return path + } + + for _, path := range []string{ + get("memory"), + get("devices"), + get("cpu"), + } { + if path != "" { + os.RemoveAll(path) + } + } + return nil +} diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index b40e1a3..f35556f 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -8,7 +8,6 @@ import ( "io/ioutil" "os" "path/filepath" - "strconv" "strings" ) @@ -22,6 +21,10 @@ type Cgroup struct { CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) } +type ActiveCgroup interface { + Cleanup() error +} + // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { mounts, err := mount.GetMounts() @@ -62,48 +65,6 @@ func GetInitCgroupDir(subsystem string) (string, error) { return parseCgroupFile(subsystem, f) } -func (c *Cgroup) Path(root, subsystem string) (string, error) { - cgroup := c.Name - if c.Parent != "" { - cgroup = filepath.Join(c.Parent, cgroup) - } - initPath, err := GetInitCgroupDir(subsystem) - if err != nil { - return "", err - } - return filepath.Join(root, subsystem, initPath, cgroup), nil -} - -func (c *Cgroup) Join(root, subsystem string, pid int) (string, error) { - path, err := c.Path(root, subsystem) - if err != nil { - return "", err - } - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return "", err - } - if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil { - return "", err - } - return path, nil -} - -func (c *Cgroup) Cleanup(root string) error { - get := func(subsystem string) string { - path, _ := c.Path(root, subsystem) - return path - } - - for _, path := range []string{ - get("memory"), - get("devices"), - get("cpu"), - } { - os.RemoveAll(path) - } - return nil -} - func parseCgroupFile(subsystem string, r io.Reader) (string, error) { s := bufio.NewScanner(r) for s.Scan() { @@ -125,126 +86,6 @@ func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } -func (c *Cgroup) Apply(pid int) error { - // We have two implementation of cgroups support, one is based on - // systemd and the dbus api, and one is based on raw cgroup fs operations - // following the pre-single-writer model docs at: - // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ - // - // we can pick any subsystem to find the root - cgroupRoot, err := FindCgroupMountpoint("cpu") - if err != nil { - return err - } - cgroupRoot = filepath.Dir(cgroupRoot) - - if _, err := os.Stat(cgroupRoot); err != nil { - return fmt.Errorf("cgroups fs not found") - } - if err := c.setupDevices(cgroupRoot, pid); err != nil { - return err - } - if err := c.setupMemory(cgroupRoot, pid); err != nil { - return err - } - if err := c.setupCpu(cgroupRoot, pid); err != nil { - return err - } - return nil -} - -func (c *Cgroup) setupDevices(cgroupRoot string, pid int) (err error) { - if !c.DeviceAccess { - dir, err := c.Join(cgroupRoot, "devices", pid) - if err != nil { - return err - } - - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if err := writeFile(dir, "devices.deny", "a"); err != nil { - return err - } - - allow := []string{ - // /dev/null, zero, full - "c 1:3 rwm", - "c 1:5 rwm", - "c 1:7 rwm", - - // consoles - "c 5:1 rwm", - "c 5:0 rwm", - "c 4:0 rwm", - "c 4:1 rwm", - - // /dev/urandom,/dev/random - "c 1:9 rwm", - "c 1:8 rwm", - - // /dev/pts/ - pts namespaces are "coming soon" - "c 136:* rwm", - "c 5:2 rwm", - - // tuntap - "c 10:200 rwm", - } - - for _, val := range allow { - if err := writeFile(dir, "devices.allow", val); err != nil { - return err - } - } - } - return nil -} - -func (c *Cgroup) setupMemory(cgroupRoot string, pid int) (err error) { - if c.Memory != 0 || c.MemorySwap != 0 { - dir, err := c.Join(cgroupRoot, "memory", pid) - if err != nil { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if c.Memory != 0 { - if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { - return err - } - if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { - return err - } - } - // By default, MemorySwap is set to twice the size of RAM. - // If you want to omit MemorySwap, set it to `-1'. - if c.MemorySwap != -1 { - if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil { - return err - } - } - } - return nil -} - -func (c *Cgroup) setupCpu(cgroupRoot string, pid int) (err error) { - // We always want to join the cpu group, to allow fair cpu scheduling - // on a container basis - dir, err := c.Join(cgroupRoot, "cpu", pid) - if err != nil { - return err - } - if c.CpuShares != 0 { - if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil { - return err - } - } - return nil +func (c *Cgroup) Apply(pid int) (ActiveCgroup, error) { + return rawApply(c, pid) } diff --git a/libcontainer/nsinit/exec.go b/libcontainer/nsinit/exec.go index 61286cc..a44faaf 100644 --- a/libcontainer/nsinit/exec.go +++ b/libcontainer/nsinit/exec.go @@ -3,6 +3,7 @@ package nsinit import ( + "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" @@ -61,10 +62,15 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ // Do this before syncing with child so that no children // can escape the cgroup ns.logger.Println("setting cgroups") - if err := ns.SetupCgroups(container, command.Process.Pid); err != nil { + activeCgroup, err := ns.SetupCgroups(container, command.Process.Pid) + if err != nil { command.Process.Kill() return -1, err } + if activeCgroup != nil { + defer activeCgroup.Cleanup() + } + ns.logger.Println("setting up network") if err := ns.InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil { command.Process.Kill() @@ -85,13 +91,11 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ return status, err } -func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) error { +func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) { if container.Cgroups != nil { - if err := container.Cgroups.Apply(nspid); err != nil { - return err - } + return container.Cgroups.Apply(nspid) } - return nil + return nil, nil } func (ns *linuxNs) InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error {