diff --git a/cgroups/apply_nosystemd.go b/cgroups/apply_nosystemd.go deleted file mode 100644 index f94d475..0000000 --- a/cgroups/apply_nosystemd.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build !linux - -package cgroups - -import ( - "fmt" -) - -func useSystemd() bool { - return false -} - -func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { - return nil, fmt.Errorf("Systemd not supported") -} diff --git a/cgroups/apply_raw.go b/cgroups/apply_raw.go deleted file mode 100644 index 471d3fc..0000000 --- a/cgroups/apply_raw.go +++ /dev/null @@ -1,256 +0,0 @@ -package cgroups - -import ( - "fmt" - "os" - "path/filepath" - "strconv" -) - -type rawCgroup struct { - root string - cgroup string -} - -func rawApply(c *Cgroup, pid int) (ActiveCgroup, error) { - // We have two implementation of cgroups support, one is based on - // systemd and the dbus api, and one is based on raw cgroup fs operations - // following the pre-single-writer model docs at: - // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ - // - // we can pick any subsystem to find the root - - cgroupRoot, err := FindCgroupMountpoint("cpu") - if err != nil { - return nil, err - } - cgroupRoot = filepath.Dir(cgroupRoot) - - if _, err := os.Stat(cgroupRoot); err != nil { - return nil, fmt.Errorf("cgroups fs not found") - } - - cgroup := c.Name - if c.Parent != "" { - cgroup = filepath.Join(c.Parent, cgroup) - } - - raw := &rawCgroup{ - root: cgroupRoot, - cgroup: cgroup, - } - for _, g := range []func(*Cgroup, int) error{ - raw.setupDevices, - raw.setupMemory, - raw.setupCpu, - raw.setupCpuset, - raw.setupCpuacct, - raw.setupBlkio, - raw.setupPerfevent, - raw.setupFreezer, - } { - if err := g(c, pid); err != nil { - return nil, err - } - } - - return raw, nil -} - -func (raw *rawCgroup) path(subsystem string) (string, error) { - initPath, err := GetInitCgroupDir(subsystem) - if err != nil { - return "", err - } - return filepath.Join(raw.root, subsystem, initPath, raw.cgroup), nil -} - -func (raw *rawCgroup) join(subsystem string, pid int) (string, error) { - path, err := raw.path(subsystem) - if err != nil { - return "", err - } - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return "", err - } - if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil { - return "", err - } - return path, nil -} - -func (raw *rawCgroup) setupDevices(c *Cgroup, pid int) (err error) { - dir, err := raw.join("devices", pid) - if err != nil { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if !c.DeviceAccess { - - if err := writeFile(dir, "devices.deny", "a"); err != nil { - return err - } - - allow := []string{ - // allow mknod for any device - "c *:* m", - "b *:* m", - - // /dev/null, zero, full - "c 1:3 rwm", - "c 1:5 rwm", - "c 1:7 rwm", - - // consoles - "c 5:1 rwm", - "c 5:0 rwm", - "c 4:0 rwm", - "c 4:1 rwm", - - // /dev/urandom,/dev/random - "c 1:9 rwm", - "c 1:8 rwm", - - // /dev/pts/ - pts namespaces are "coming soon" - "c 136:* rwm", - "c 5:2 rwm", - - // tuntap - "c 10:200 rwm", - } - - for _, val := range allow { - if err := writeFile(dir, "devices.allow", val); err != nil { - return err - } - } - } - return nil -} - -func (raw *rawCgroup) setupMemory(c *Cgroup, pid int) (err error) { - dir, err := raw.join("memory", pid) - // only return an error for memory if it was not specified - if err != nil && (c.Memory != 0 || c.MemorySwap != 0) { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if c.Memory != 0 || c.MemorySwap != 0 { - if c.Memory != 0 { - if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { - return err - } - if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { - return err - } - } - // By default, MemorySwap is set to twice the size of RAM. - // If you want to omit MemorySwap, set it to `-1'. - if c.MemorySwap != -1 { - if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil { - return err - } - } - } - return nil -} - -func (raw *rawCgroup) setupCpu(c *Cgroup, pid int) (err error) { - // We always want to join the cpu group, to allow fair cpu scheduling - // on a container basis - dir, err := raw.join("cpu", pid) - if err != nil { - return err - } - if c.CpuShares != 0 { - if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil { - return err - } - } - return nil -} - -func (raw *rawCgroup) setupCpuset(c *Cgroup, pid int) (err error) { - // we don't want to join this cgroup unless it is specified - if c.CpusetCpus != "" { - dir, err := raw.join("cpuset", pid) - if err != nil && c.CpusetCpus != "" { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if err := writeFile(dir, "cpuset.cpus", c.CpusetCpus); err != nil { - return err - } - } - return nil -} - -func (raw *rawCgroup) setupCpuacct(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("cpuacct", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) setupBlkio(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("blkio", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) setupPerfevent(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("perf_event", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) setupFreezer(c *Cgroup, pid int) error { - // we just want to join this group even though we don't set anything - if _, err := raw.join("freezer", pid); err != nil && err != ErrNotFound { - return err - } - return nil -} - -func (raw *rawCgroup) Cleanup() error { - get := func(subsystem string) string { - path, _ := raw.path(subsystem) - return path - } - - for _, path := range []string{ - get("memory"), - get("devices"), - get("cpu"), - get("cpuset"), - get("cpuacct"), - get("blkio"), - get("perf_event"), - get("freezer"), - } { - if path != "" { - os.RemoveAll(path) - } - } - return nil -} diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index e5e8f82..3aac971 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -1,14 +1,7 @@ package cgroups import ( - "bufio" "errors" - "github.com/dotcloud/docker/pkg/mount" - "io" - "io/ioutil" - "os" - "path/filepath" - "strings" ) var ( @@ -31,77 +24,3 @@ type Cgroup struct { type ActiveCgroup interface { Cleanup() error } - -// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt -func FindCgroupMountpoint(subsystem string) (string, error) { - mounts, err := mount.GetMounts() - if err != nil { - return "", err - } - - for _, mount := range mounts { - if mount.Fstype == "cgroup" { - for _, opt := range strings.Split(mount.VfsOpts, ",") { - if opt == subsystem { - return mount.Mountpoint, nil - } - } - } - } - return "", ErrNotFound -} - -// Returns the relative path to the cgroup docker is running in. -func GetThisCgroupDir(subsystem string) (string, error) { - f, err := os.Open("/proc/self/cgroup") - if err != nil { - return "", err - } - defer f.Close() - - return parseCgroupFile(subsystem, f) -} - -func GetInitCgroupDir(subsystem string) (string, error) { - f, err := os.Open("/proc/1/cgroup") - if err != nil { - return "", err - } - defer f.Close() - - return parseCgroupFile(subsystem, f) -} - -func parseCgroupFile(subsystem string, r io.Reader) (string, error) { - s := bufio.NewScanner(r) - for s.Scan() { - if err := s.Err(); err != nil { - return "", err - } - text := s.Text() - parts := strings.Split(text, ":") - for _, subs := range strings.Split(parts[1], ",") { - if subs == subsystem { - return parts[2], nil - } - } - } - return "", ErrNotFound -} - -func writeFile(dir, file, data string) error { - return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) -} - -func (c *Cgroup) Apply(pid int) (ActiveCgroup, error) { - // We have two implementation of cgroups support, one is based on - // systemd and the dbus api, and one is based on raw cgroup fs operations - // following the pre-single-writer model docs at: - // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ - - if useSystemd() { - return systemdApply(c, pid) - } else { - return rawApply(c, pid) - } -} diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go new file mode 100644 index 0000000..60f318e --- /dev/null +++ b/cgroups/fs/apply_raw.go @@ -0,0 +1,146 @@ +package fs + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + "github.com/dotcloud/docker/pkg/cgroups" +) + +var ( + subsystems = map[string]subsystem{ + "devices": &devicesGroup{}, + "memory": &memoryGroup{}, + "cpu": &cpuGroup{}, + "cpuset": &cpusetGroup{}, + "cpuacct": &cpuacctGroup{}, + "blkio": &blkioGroup{}, + "perf_event": &perfEventGroup{}, + "freezer": &freezerGroup{}, + } +) + +type subsystem interface { + Set(*data) error + Remove(*data) error + Stats(*data) (map[string]float64, error) +} + +type data struct { + root string + cgroup string + c *cgroups.Cgroup + pid int +} + +func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { + // We have two implementation of cgroups support, one is based on + // systemd and the dbus api, and one is based on raw cgroup fs operations + // following the pre-single-writer model docs at: + // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ + // + // we can pick any subsystem to find the root + + cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return nil, fmt.Errorf("cgroups fs not found") + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + d := &data{ + root: cgroupRoot, + cgroup: cgroup, + c: c, + pid: pid, + } + for _, sys := range subsystems { + if err := sys.Set(d); err != nil { + return nil, err + } + } + return d, nil +} + +func GetStats(c *cgroups.Cgroup, subsystem string, pid int) (map[string]float64, error) { + cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return nil, fmt.Errorf("cgroups fs not found") + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + d := &data{ + root: cgroupRoot, + cgroup: cgroup, + c: c, + pid: pid, + } + sys, exists := subsystems[subsystem] + if !exists { + return nil, fmt.Errorf("subsystem %s does not exist", subsystem) + } + return sys.Stats(d) +} + +func (raw *data) path(subsystem string) (string, error) { + initPath, err := cgroups.GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + return filepath.Join(raw.root, subsystem, initPath, raw.cgroup), nil +} + +func (raw *data) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + if err := writeFile(path, "cgroup.procs", strconv.Itoa(raw.pid)); err != nil { + return "", err + } + return path, nil +} + +func (raw *data) Cleanup() error { + for _, sys := range subsystems { + sys.Remove(raw) + } + return nil +} + +func writeFile(dir, file, data string) error { + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} diff --git a/cgroups/fs/blkio.go b/cgroups/fs/blkio.go new file mode 100644 index 0000000..79e14fa --- /dev/null +++ b/cgroups/fs/blkio.go @@ -0,0 +1,121 @@ +package fs + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/dotcloud/docker/pkg/cgroups" +) + +type blkioGroup struct { +} + +func (s *blkioGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("blkio"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *blkioGroup) Remove(d *data) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ +func (s *blkioGroup) Stats(d *data) (map[string]float64, error) { + var ( + paramData = make(map[string]float64) + params = []string{ + "io_service_bytes_recursive", + "io_serviced_recursive", + "io_queued_recursive", + } + ) + + path, err := d.path("blkio") + if err != nil { + return nil, err + } + + k, v, err := s.getSectors(path) + if err != nil { + return nil, err + } + paramData[fmt.Sprintf("blkio.sectors_recursive:%s", k)] = v + + for _, param := range params { + f, err := os.Open(filepath.Join(path, fmt.Sprintf("blkio.%s", param))) + if err != nil { + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.Fields(sc.Text()) + switch len(fields) { + case 3: + v, err := strconv.ParseFloat(fields[2], 64) + if err != nil { + return nil, err + } + paramData[fmt.Sprintf("%s:%s:%s", param, fields[0], fields[1])] = v + case 2: + // this is the total line, skip + default: + return nil, ErrNotValidFormat + } + } + } + return paramData, nil +} + +func (s *blkioGroup) getSectors(path string) (string, float64, error) { + f, err := os.Open(filepath.Join(path, "blkio.sectors_recursive")) + if err != nil { + return "", 0, err + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + return "", 0, err + } + return getCgroupParamKeyValue(string(data)) +} diff --git a/cgroups/fs/cpu.go b/cgroups/fs/cpu.go new file mode 100644 index 0000000..8eb0c4f --- /dev/null +++ b/cgroups/fs/cpu.go @@ -0,0 +1,33 @@ +package fs + +import ( + "strconv" +) + +type cpuGroup struct { +} + +func (s *cpuGroup) Set(d *data) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + dir, err := d.join("cpu") + if err != nil { + return err + } + if d.c.CpuShares != 0 { + if err := writeFile(dir, "cpu.shares", strconv.FormatInt(d.c.CpuShares, 10)); err != nil { + return err + } + } + return nil +} + +func (s *cpuGroup) Remove(d *data) error { + return removePath(d.path("cpu")) +} + +func (s *cpuGroup) Stats(d *data) (map[string]float64, error) { + // we can reuse the cpuacct subsystem to get the cpu stats + sys := subsystems["cpuacct"] + return sys.Stats(d) +} diff --git a/cgroups/fs/cpuacct.go b/cgroups/fs/cpuacct.go new file mode 100644 index 0000000..4ea2b1f --- /dev/null +++ b/cgroups/fs/cpuacct.go @@ -0,0 +1,131 @@ +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "time" + + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/system" +) + +var ( + cpuCount = float64(runtime.NumCPU()) + clockTicks = float64(system.GetClockTicks()) +) + +type cpuacctGroup struct { +} + +func (s *cpuacctGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *cpuacctGroup) Remove(d *data) error { + return removePath(d.path("cpuacct")) +} + +func (s *cpuacctGroup) Stats(d *data) (map[string]float64, error) { + var ( + startCpu, lastCpu, startSystem, lastSystem float64 + percentage float64 + paramData = make(map[string]float64) + ) + path, err := d.path("cpuacct") + if startCpu, err = s.getCpuUsage(d, path); err != nil { + return nil, err + } + if startSystem, err = s.getSystemCpuUsage(d); err != nil { + return nil, err + } + // sample for 100ms + time.Sleep(100 * time.Millisecond) + if lastCpu, err = s.getCpuUsage(d, path); err != nil { + return nil, err + } + if lastSystem, err = s.getSystemCpuUsage(d); err != nil { + return nil, err + } + + var ( + deltaProc = lastCpu - startCpu + deltaSystem = lastSystem - startSystem + ) + if deltaSystem > 0.0 { + percentage = ((deltaProc / deltaSystem) * clockTicks) * cpuCount + } + // NOTE: a percentage over 100% is valid for POSIX because that means the + // processes is using multiple cores + paramData["percentage"] = percentage + return paramData, nil +} + +func (s *cpuacctGroup) getProcStarttime(d *data) (float64, error) { + rawStart, err := system.GetProcessStartTime(d.pid) + if err != nil { + return 0, err + } + return strconv.ParseFloat(rawStart, 64) +} + +func (s *cpuacctGroup) getSystemCpuUsage(d *data) (float64, error) { + + f, err := os.Open("/proc/stat") + if err != nil { + return 0, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + parts := strings.Fields(sc.Text()) + switch parts[0] { + case "cpu": + if len(parts) < 8 { + return 0, fmt.Errorf("invalid number of cpu fields") + } + + var total float64 + for _, i := range parts[1:8] { + v, err := strconv.ParseFloat(i, 64) + if err != nil { + return 0.0, fmt.Errorf("Unable to convert value %s to float: %s", i, err) + } + total += v + } + return total, nil + default: + continue + } + } + return 0, fmt.Errorf("invalid stat format") +} + +func (s *cpuacctGroup) getCpuUsage(d *data, path string) (float64, error) { + cpuTotal := 0.0 + f, err := os.Open(filepath.Join(path, "cpuacct.stat")) + if err != nil { + return 0.0, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + _, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return 0.0, err + } + // set the raw data in map + cpuTotal += v + } + return cpuTotal, nil +} diff --git a/cgroups/fs/cpuset.go b/cgroups/fs/cpuset.go new file mode 100644 index 0000000..8a13c56 --- /dev/null +++ b/cgroups/fs/cpuset.go @@ -0,0 +1,36 @@ +package fs + +import ( + "os" +) + +type cpusetGroup struct { +} + +func (s *cpusetGroup) Set(d *data) error { + // we don't want to join this cgroup unless it is specified + if d.c.CpusetCpus != "" { + dir, err := d.join("cpuset") + if err != nil && d.c.CpusetCpus != "" { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if err := writeFile(dir, "cpuset.cpus", d.c.CpusetCpus); err != nil { + return err + } + } + return nil +} + +func (s *cpusetGroup) Remove(d *data) error { + return removePath(d.path("cpuset")) +} + +func (s *cpusetGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/cgroups/fs/devices.go b/cgroups/fs/devices.go new file mode 100644 index 0000000..a2f91ed --- /dev/null +++ b/cgroups/fs/devices.go @@ -0,0 +1,69 @@ +package fs + +import ( + "os" +) + +type devicesGroup struct { +} + +func (s *devicesGroup) Set(d *data) error { + dir, err := d.join("devices") + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if !d.c.DeviceAccess { + if err := writeFile(dir, "devices.deny", "a"); err != nil { + return err + } + + allow := []string{ + // allow mknod for any device + "c *:* m", + "b *:* m", + + // /dev/null, zero, full + "c 1:3 rwm", + "c 1:5 rwm", + "c 1:7 rwm", + + // consoles + "c 5:1 rwm", + "c 5:0 rwm", + "c 4:0 rwm", + "c 4:1 rwm", + + // /dev/urandom,/dev/random + "c 1:9 rwm", + "c 1:8 rwm", + + // /dev/pts/ - pts namespaces are "coming soon" + "c 136:* rwm", + "c 5:2 rwm", + + // tuntap + "c 10:200 rwm", + } + + for _, val := range allow { + if err := writeFile(dir, "devices.allow", val); err != nil { + return err + } + } + } + return nil +} + +func (s *devicesGroup) Remove(d *data) error { + return removePath(d.path("devices")) +} + +func (s *devicesGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/cgroups/fs/freezer.go b/cgroups/fs/freezer.go new file mode 100644 index 0000000..ebf5bb9 --- /dev/null +++ b/cgroups/fs/freezer.go @@ -0,0 +1,62 @@ +package fs + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/cgroups" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" +) + +type freezerGroup struct { +} + +func (s *freezerGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("freezer"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *freezerGroup) Remove(d *data) error { + return removePath(d.path("freezer")) +} + +func (s *freezerGroup) Stats(d *data) (map[string]float64, error) { + var ( + paramData = make(map[string]float64) + params = []string{ + "parent_freezing", + "self_freezing", + // comment out right now because this is string "state", + } + ) + + path, err := d.path("freezer") + if err != nil { + return nil, err + } + + for _, param := range params { + f, err := os.Open(filepath.Join(path, fmt.Sprintf("freezer.%s", param))) + if err != nil { + return nil, err + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + + v, err := strconv.ParseFloat(strings.TrimSuffix(string(data), "\n"), 64) + if err != nil { + return nil, err + } + paramData[param] = v + } + return paramData, nil +} diff --git a/cgroups/fs/memory.go b/cgroups/fs/memory.go new file mode 100644 index 0000000..cf4bf5a --- /dev/null +++ b/cgroups/fs/memory.go @@ -0,0 +1,71 @@ +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" +) + +type memoryGroup struct { +} + +func (s *memoryGroup) Set(d *data) error { + dir, err := d.join("memory") + // only return an error for memory if it was not specified + if err != nil && (d.c.Memory != 0 || d.c.MemorySwap != 0) { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if d.c.Memory != 0 || d.c.MemorySwap != 0 { + if d.c.Memory != 0 { + if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil { + return err + } + if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil { + return err + } + } + // By default, MemorySwap is set to twice the size of RAM. + // If you want to omit MemorySwap, set it to `-1'. + if d.c.MemorySwap != -1 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil { + return err + } + } + } + return nil +} + +func (s *memoryGroup) Remove(d *data) error { + return removePath(d.path("memory")) +} + +func (s *memoryGroup) Stats(d *data) (map[string]float64, error) { + paramData := make(map[string]float64) + path, err := d.path("memory") + if err != nil { + return nil, err + } + + f, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return nil, err + } + paramData[t] = v + } + return paramData, nil +} diff --git a/cgroups/fs/perf_event.go b/cgroups/fs/perf_event.go new file mode 100644 index 0000000..789b3e5 --- /dev/null +++ b/cgroups/fs/perf_event.go @@ -0,0 +1,24 @@ +package fs + +import ( + "github.com/dotcloud/docker/pkg/cgroups" +) + +type perfEventGroup struct { +} + +func (s *perfEventGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *perfEventGroup) Remove(d *data) error { + return removePath(d.path("perf_event")) +} + +func (s *perfEventGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/cgroups/fs/utils.go b/cgroups/fs/utils.go new file mode 100644 index 0000000..f4c4846 --- /dev/null +++ b/cgroups/fs/utils.go @@ -0,0 +1,29 @@ +package fs + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +var ( + ErrNotSupportStat = errors.New("stats are not supported for subsystem") + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func getCgroupParamKeyValue(t string) (string, float64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := strconv.ParseFloat(parts[1], 64) + if err != nil { + return "", 0.0, fmt.Errorf("Unable to convert param value to float: %s", err) + } + return parts[0], value, nil + default: + return "", 0.0, ErrNotValidFormat + } +} diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go new file mode 100644 index 0000000..226aa59 --- /dev/null +++ b/cgroups/systemd/apply_nosystemd.go @@ -0,0 +1,16 @@ +// +build !linux + +package systemd + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/cgroups" +) + +func UseSystemd() bool { + return false +} + +func systemdApply(c *Cgroup, pid int) (cgroups.ActiveCgroup, error) { + return nil, fmt.Errorf("Systemd not supported") +} diff --git a/cgroups/apply_systemd.go b/cgroups/systemd/apply_systemd.go similarity index 86% rename from cgroups/apply_systemd.go rename to cgroups/systemd/apply_systemd.go index a9b3a8d..7c26080 100644 --- a/cgroups/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -1,27 +1,35 @@ // +build linux -package cgroups +package systemd import ( "fmt" - systemd1 "github.com/coreos/go-systemd/dbus" - "github.com/dotcloud/docker/pkg/systemd" - "github.com/godbus/dbus" + "io/ioutil" "path/filepath" "strings" "sync" + + systemd1 "github.com/coreos/go-systemd/dbus" + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/systemd" + "github.com/godbus/dbus" ) type systemdCgroup struct { } +type DeviceAllow struct { + Node string + Permissions string +} + var ( connLock sync.Mutex theConn *systemd1.Conn hasStartTransientUnit bool ) -func useSystemd() bool { +func UseSystemd() bool { if !systemd.SdBooted() { return false } @@ -48,15 +56,9 @@ func useSystemd() bool { } } } - return hasStartTransientUnit } -type DeviceAllow struct { - Node string - Permissions string -} - func getIfaceForUnit(unitName string) string { if strings.HasSuffix(unitName, ".scope") { return "Scope" @@ -67,11 +69,12 @@ func getIfaceForUnit(unitName string) string { return "Unit" } -func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { - unitName := c.Parent + "-" + c.Name + ".scope" - slice := "system.slice" - - var properties []systemd1.Property +func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { + var ( + unitName = c.Parent + "-" + c.Name + ".scope" + slice = "system.slice" + properties []systemd1.Property + ) for _, v := range c.UnitProperties { switch v[0] { @@ -85,7 +88,8 @@ func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { properties = append(properties, systemd1.Property{"Slice", dbus.MakeVariant(slice)}, systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)}, - systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}) + systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}, + ) if !c.DeviceAccess { properties = append(properties, @@ -138,7 +142,7 @@ func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { cgroup := props["ControlGroup"].(string) if !c.DeviceAccess { - mountpoint, err := FindCgroupMountpoint("devices") + mountpoint, err := cgroups.FindCgroupMountpoint("devices") if err != nil { return nil, err } @@ -146,15 +150,14 @@ func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) { path := filepath.Join(mountpoint, cgroup) // /dev/pts/* - if err := writeFile(path, "devices.allow", "c 136:* rwm"); err != nil { + if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte("c 136:* rwm"), 0700); err != nil { return nil, err } // tuntap - if err := writeFile(path, "devices.allow", "c 10:200 rwm"); err != nil { + if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte("c 10:200 rwm"), 0700); err != nil { return nil, err } } - return &systemdCgroup{}, nil } diff --git a/cgroups/utils.go b/cgroups/utils.go new file mode 100644 index 0000000..02a7f35 --- /dev/null +++ b/cgroups/utils.go @@ -0,0 +1,67 @@ +package cgroups + +import ( + "bufio" + "io" + "os" + "strings" + + "github.com/dotcloud/docker/pkg/mount" +) + +// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt +func FindCgroupMountpoint(subsystem string) (string, error) { + mounts, err := mount.GetMounts() + if err != nil { + return "", err + } + + for _, mount := range mounts { + if mount.Fstype == "cgroup" { + for _, opt := range strings.Split(mount.VfsOpts, ",") { + if opt == subsystem { + return mount.Mountpoint, nil + } + } + } + } + return "", ErrNotFound +} + +// Returns the relative path to the cgroup docker is running in. +func GetThisCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return parseCgroupFile(subsystem, f) +} + +func GetInitCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/1/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return parseCgroupFile(subsystem, f) +} + +func parseCgroupFile(subsystem string, r io.Reader) (string, error) { + s := bufio.NewScanner(r) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + text := s.Text() + parts := strings.Split(text, ":") + for _, subs := range strings.Split(parts[1], ",") { + if subs == subsystem { + return parts[2], nil + } + } + } + return "", ErrNotFound +} diff --git a/libcontainer/nsinit/exec.go b/libcontainer/nsinit/exec.go index c07c45d..e76e060 100644 --- a/libcontainer/nsinit/exec.go +++ b/libcontainer/nsinit/exec.go @@ -8,6 +8,8 @@ import ( "syscall" "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/cgroups/fs" + "github.com/dotcloud/docker/pkg/cgroups/systemd" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" @@ -99,7 +101,11 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) { if container.Cgroups != nil { - return container.Cgroups.Apply(nspid) + c := container.Cgroups + if systemd.UseSystemd() { + return systemd.Apply(c, nspid) + } + return fs.Apply(c, nspid) } return nil, nil } diff --git a/system/sysconfig.go b/system/sysconfig.go new file mode 100644 index 0000000..dcbe6c9 --- /dev/null +++ b/system/sysconfig.go @@ -0,0 +1,13 @@ +// +build linux,cgo + +package system + +/* +#include +int get_hz(void) { return sysconf(_SC_CLK_TCK); } +*/ +import "C" + +func GetClockTicks() int { + return int(C.get_hz()) +} diff --git a/system/sysconfig_nocgo.go b/system/sysconfig_nocgo.go new file mode 100644 index 0000000..7ca3488 --- /dev/null +++ b/system/sysconfig_nocgo.go @@ -0,0 +1,9 @@ +// +build linux,!cgo + +package system + +func GetClockTicks() int { + // when we cannot call out to C to get the sysconf it is fairly safe to + // just return 100 + return 100 +} diff --git a/system/unsupported.go b/system/unsupported.go index c52a1e5..4ae2a48 100644 --- a/system/unsupported.go +++ b/system/unsupported.go @@ -17,3 +17,9 @@ func UsetCloseOnExec(fd uintptr) error { func Gettid() int { return 0 } + +func GetClockTicks() int { + // when we cannot call out to C to get the sysconf it is fairly safe to + // just return 100 + return 100 +}