From dd2b4a8907e578dbba97666cee8fa15945d3096b Mon Sep 17 00:00:00 2001 From: Antonio Murdaca Date: Tue, 29 Aug 2017 17:52:05 +0200 Subject: [PATCH 1/3] server: container_create: fix cgroupfs scopes naming Signed-off-by: Antonio Murdaca --- server/container_create.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/server/container_create.go b/server/container_create.go index 91a2370f..713ab823 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -471,14 +471,22 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, specgen.SetProcessOOMScoreAdj(int(oomScoreAdj)) } - if sb.CgroupParent() != "" { - if s.config.CgroupManager == "systemd" { - cgPath := sb.CgroupParent() + ":" + "crio" + ":" + containerID - specgen.SetLinuxCgroupsPath(cgPath) - } else { - specgen.SetLinuxCgroupsPath(sb.CgroupParent() + "/" + containerID) - } + var cgPath string + scopePrefix := "crio" + parent := "/crio" + useSystemd := s.config.CgroupManager == "systemd" + if useSystemd { + parent = "system.slice" } + if sb.CgroupParent() != "" { + parent = sb.CgroupParent() + } + if useSystemd { + cgPath = parent + ":" + scopePrefix + ":" + containerID + } else { + cgPath = filepath.Join(parent, scopePrefix+"-"+containerID) + } + specgen.SetLinuxCgroupsPath(cgPath) capabilities := linux.GetSecurityContext().GetCapabilities() toCAPPrefixed := func(cap string) string { From c199f63dbaffb0a55b8683992a7202af62138781 Mon Sep 17 00:00:00 2001 From: Antonio Murdaca Date: Tue, 29 Aug 2017 19:09:59 +0200 Subject: [PATCH 2/3] oci: join crio-conmon for cgroupfs Signed-off-by: Antonio Murdaca --- libkpod/sandbox/sandbox.go | 5 + oci/oci.go | 19 +- server/container_create.go | 1 + vendor.conf | 1 + vendor/github.com/containerd/cgroups/LICENSE | 201 +++++++++ .../github.com/containerd/cgroups/README.md | 112 +++++ vendor/github.com/containerd/cgroups/blkio.go | 323 +++++++++++++++ .../github.com/containerd/cgroups/cgroup.go | 389 ++++++++++++++++++ .../github.com/containerd/cgroups/control.go | 58 +++ vendor/github.com/containerd/cgroups/cpu.go | 120 ++++++ .../github.com/containerd/cgroups/cpuacct.go | 112 +++++ .../github.com/containerd/cgroups/cpuset.go | 139 +++++++ .../github.com/containerd/cgroups/devices.go | 74 ++++ .../github.com/containerd/cgroups/errors.go | 31 ++ .../github.com/containerd/cgroups/freezer.go | 69 ++++ .../containerd/cgroups/hierarchy.go | 4 + .../github.com/containerd/cgroups/hugetlb.go | 92 +++++ .../github.com/containerd/cgroups/memory.go | 304 ++++++++++++++ vendor/github.com/containerd/cgroups/named.go | 23 ++ .../github.com/containerd/cgroups/net_cls.go | 42 ++ .../github.com/containerd/cgroups/net_prio.go | 50 +++ vendor/github.com/containerd/cgroups/paths.go | 88 ++++ .../containerd/cgroups/perf_event.go | 21 + vendor/github.com/containerd/cgroups/pids.go | 69 ++++ vendor/github.com/containerd/cgroups/state.go | 12 + vendor/github.com/containerd/cgroups/stats.go | 109 +++++ .../containerd/cgroups/subsystem.go | 94 +++++ .../github.com/containerd/cgroups/systemd.go | 101 +++++ vendor/github.com/containerd/cgroups/ticks.go | 10 + vendor/github.com/containerd/cgroups/utils.go | 280 +++++++++++++ vendor/github.com/containerd/cgroups/v1.go | 65 +++ 31 files changed, 3013 insertions(+), 5 deletions(-) create mode 100644 vendor/github.com/containerd/cgroups/LICENSE create mode 100644 vendor/github.com/containerd/cgroups/README.md create mode 100644 vendor/github.com/containerd/cgroups/blkio.go create mode 100644 vendor/github.com/containerd/cgroups/cgroup.go create mode 100644 vendor/github.com/containerd/cgroups/control.go create mode 100644 vendor/github.com/containerd/cgroups/cpu.go create mode 100644 vendor/github.com/containerd/cgroups/cpuacct.go create mode 100644 vendor/github.com/containerd/cgroups/cpuset.go create mode 100644 vendor/github.com/containerd/cgroups/devices.go create mode 100644 vendor/github.com/containerd/cgroups/errors.go create mode 100644 vendor/github.com/containerd/cgroups/freezer.go create mode 100644 vendor/github.com/containerd/cgroups/hierarchy.go create mode 100644 vendor/github.com/containerd/cgroups/hugetlb.go create mode 100644 vendor/github.com/containerd/cgroups/memory.go create mode 100644 vendor/github.com/containerd/cgroups/named.go create mode 100644 vendor/github.com/containerd/cgroups/net_cls.go create mode 100644 vendor/github.com/containerd/cgroups/net_prio.go create mode 100644 vendor/github.com/containerd/cgroups/paths.go create mode 100644 vendor/github.com/containerd/cgroups/perf_event.go create mode 100644 vendor/github.com/containerd/cgroups/pids.go create mode 100644 vendor/github.com/containerd/cgroups/state.go create mode 100644 vendor/github.com/containerd/cgroups/stats.go create mode 100644 vendor/github.com/containerd/cgroups/subsystem.go create mode 100644 vendor/github.com/containerd/cgroups/systemd.go create mode 100644 vendor/github.com/containerd/cgroups/ticks.go create mode 100644 vendor/github.com/containerd/cgroups/utils.go create mode 100644 vendor/github.com/containerd/cgroups/v1.go diff --git a/libkpod/sandbox/sandbox.go b/libkpod/sandbox/sandbox.go index 992ff6c9..a3ba4010 100644 --- a/libkpod/sandbox/sandbox.go +++ b/libkpod/sandbox/sandbox.go @@ -273,6 +273,11 @@ func (s *Sandbox) CgroupParent() string { return s.cgroupParent } +// UpdateCgroupParent updates the cgroup parent for a sandbox +func (s *Sandbox) UpdateCgroupParent(parent string) { + s.cgroupParent = parent +} + // Privileged returns whether or not the containers in the sandbox are // privileged containers func (s *Sandbox) Privileged() bool { diff --git a/oci/oci.go b/oci/oci.go index 17f884be..d9f9e0b4 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -13,6 +13,7 @@ import ( "syscall" "time" + "github.com/containerd/cgroups" "github.com/kubernetes-incubator/cri-o/utils" rspec "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" @@ -186,11 +187,19 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { childStartPipe.Close() // Move conmon to specified cgroup - if cgroupParent != "" { - if r.cgroupManager == "systemd" { - logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("crio-conmon", c.id)) - if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("crio-conmon", c.id)); err != nil { - logrus.Warnf("Failed to add conmon to sandbox cgroup: %v", err) + if r.cgroupManager == "systemd" { + logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("crio-conmon", c.id)) + if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("crio-conmon", c.id)); err != nil { + logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) + } + } else { + control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/crio-conmon-"+c.id)), &rspec.LinuxResources{}) + if err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else { + defer control.Delete() + if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) } } } diff --git a/server/container_create.go b/server/container_create.go index 713ab823..6c7c7754 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -487,6 +487,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, cgPath = filepath.Join(parent, scopePrefix+"-"+containerID) } specgen.SetLinuxCgroupsPath(cgPath) + sb.UpdateCgroupParent(parent) capabilities := linux.GetSecurityContext().GetCapabilities() toCAPPrefixed := func(cap string) string { diff --git a/vendor.conf b/vendor.conf index 363d1e4a..d768a92d 100644 --- a/vendor.conf +++ b/vendor.conf @@ -96,3 +96,4 @@ github.com/prometheus/common 13ba4ddd0caa9c28ca7b7bffe1dfa9ed8d5ef207 github.com/prometheus/procfs 65c1f6f8f0fc1e2185eb9863a3bc751496404259 github.com/matttproud/golang_protobuf_extensions fc2b8d3a73c4867e51861bbdd5ae3c1f0869dd6a github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4 +github.com/containerd/cgroups 7a5fdd8330119dc70d850260db8f3594d89d6943 diff --git a/vendor/github.com/containerd/cgroups/LICENSE b/vendor/github.com/containerd/cgroups/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/vendor/github.com/containerd/cgroups/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/containerd/cgroups/README.md b/vendor/github.com/containerd/cgroups/README.md new file mode 100644 index 00000000..69e932a9 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/README.md @@ -0,0 +1,112 @@ +# cgroups + +[![Build Status](https://travis-ci.org/containerd/cgroups.svg?branch=master)](https://travis-ci.org/containerd/cgroups) + +[![codecov](https://codecov.io/gh/containerd/cgroups/branch/master/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups) + +Go package for creating, managing, inspecting, and destroying cgroups. +The resources format for settings on the cgroup uses the OCI runtime-spec found +[here](https://github.com/opencontainers/runtime-spec). + +## Examples + +### Create a new cgroup + +This creates a new cgroup using a static path for all subsystems under `/test`. + +* /sys/fs/cgroup/cpu/test +* /sys/fs/cgroup/memory/test +* etc.... + +It uses a single hierarchy and specifies cpu shares as a resource constraint and +uses the v1 implementation of cgroups. + + +```go +shares := uint64(100) +control, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/test"), &specs.LinuxResources{ + CPU: &specs.CPU{ + Shares: &shares, + }, +}) +defer control.Delete() +``` + +### Create with systemd slice support + + +```go +control, err := cgroups.New(cgroups.Systemd, cgroups.Slice("system.slice", "runc-test"), &specs.LinuxResources{ + CPU: &specs.CPU{ + Shares: &shares, + }, +}) + +``` + +### Load an existing cgroup + +```go +control, err = cgroups.Load(cgroups.V1, cgroups.StaticPath("/test")) +``` + +### Add a process to the cgroup + +```go +if err := control.Add(cgroups.Process{Pid:1234}); err != nil { +} +``` + +### Update the cgroup + +To update the resources applied in the cgroup + +```go +shares = uint64(200) +if err := control.Update(&specs.LinuxResources{ + CPU: &specs.CPU{ + Shares: &shares, + }, +}); err != nil { +} +``` + +### Freeze and Thaw the cgroup + +```go +if err := control.Freeze(); err != nil { +} +if err := control.Thaw(); err != nil { +} +``` + +### List all processes in the cgroup or recursively + +```go +processes, err := control.Processes(cgroups.Devices, recursive) +``` + +### Get Stats on the cgroup + +```go +stats, err := control.Stat() +``` + +By adding `cgroups.IgnoreNotExist` all non-existent files will be ignored, e.g. swap memory stats without swap enabled +```go +stats, err := control.Stat(cgroups.IgnoreNotExist) +``` + +### Move process across cgroups + +This allows you to take processes from one cgroup and move them to another. + +```go +err := control.MoveTo(destination) +``` + +### Create subcgroup + +```go +subCgroup, err := control.New("child", resources) +``` diff --git a/vendor/github.com/containerd/cgroups/blkio.go b/vendor/github.com/containerd/cgroups/blkio.go new file mode 100644 index 00000000..e1fd73ce --- /dev/null +++ b/vendor/github.com/containerd/cgroups/blkio.go @@ -0,0 +1,323 @@ +package cgroups + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewBlkio(root string) *blkioController { + return &blkioController{ + root: filepath.Join(root, string(Blkio)), + } +} + +type blkioController struct { + root string +} + +func (b *blkioController) Name() Name { + return Blkio +} + +func (b *blkioController) Path(path string) string { + return filepath.Join(b.root, path) +} + +func (b *blkioController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(b.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.BlockIO == nil { + return nil + } + for _, t := range createBlkioSettings(resources.BlockIO) { + if t.value != nil { + if err := ioutil.WriteFile( + filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", t.name)), + t.format(t.value), + defaultFilePerm, + ); err != nil { + return err + } + } + } + return nil +} + +func (b *blkioController) Update(path string, resources *specs.LinuxResources) error { + return b.Create(path, resources) +} + +func (b *blkioController) Stat(path string, stats *Stats) error { + stats.Blkio = &BlkioStat{} + settings := []blkioStatSettings{ + { + name: "throttle.io_serviced", + entry: &stats.Blkio.IoServicedRecursive, + }, + { + name: "throttle.io_service_bytes", + entry: &stats.Blkio.IoServiceBytesRecursive, + }, + } + // Try to read CFQ stats available on all CFQ enabled kernels first + if _, err := os.Lstat(filepath.Join(b.Path(path), fmt.Sprintf("blkio.io_serviced_recursive"))); err == nil { + settings = append(settings, + blkioStatSettings{ + name: "sectors_recursive", + entry: &stats.Blkio.SectorsRecursive, + }, + blkioStatSettings{ + name: "io_service_bytes_recursive", + entry: &stats.Blkio.IoServiceBytesRecursive, + }, + blkioStatSettings{ + name: "io_serviced_recursive", + entry: &stats.Blkio.IoServicedRecursive, + }, + blkioStatSettings{ + name: "io_queued_recursive", + entry: &stats.Blkio.IoQueuedRecursive, + }, + blkioStatSettings{ + name: "io_service_time_recursive", + entry: &stats.Blkio.IoServiceTimeRecursive, + }, + blkioStatSettings{ + name: "io_wait_time_recursive", + entry: &stats.Blkio.IoWaitTimeRecursive, + }, + blkioStatSettings{ + name: "io_merged_recursive", + entry: &stats.Blkio.IoMergedRecursive, + }, + blkioStatSettings{ + name: "time_recursive", + entry: &stats.Blkio.IoTimeRecursive, + }, + ) + } + + devices, err := getDevices("/dev") + if err != nil { + return err + } + + for _, t := range settings { + if err := b.readEntry(devices, path, t.name, t.entry); err != nil { + return err + } + } + return nil +} + +func (b *blkioController) readEntry(devices map[deviceKey]string, path, name string, entry *[]BlkioEntry) error { + f, err := os.Open(filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", name))) + if err != nil { + return err + } + defer f.Close() + sc := bufio.NewScanner(f) + for sc.Scan() { + if err := sc.Err(); err != nil { + return err + } + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) + } + } + major, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return err + } + minor, err := strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return err + } + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err := strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return err + } + *entry = append(*entry, BlkioEntry{ + Device: devices[deviceKey{major, minor}], + Major: major, + Minor: minor, + Op: op, + Value: v, + }) + } + return nil +} + +func createBlkioSettings(blkio *specs.LinuxBlockIO) []blkioSettings { + settings := []blkioSettings{ + { + name: "weight", + value: blkio.Weight, + format: uintf, + }, + { + name: "leaf_weight", + value: blkio.LeafWeight, + format: uintf, + }, + } + for _, wd := range blkio.WeightDevice { + settings = append(settings, + blkioSettings{ + name: "weight_device", + value: wd, + format: weightdev, + }, + blkioSettings{ + name: "leaf_weight_device", + value: wd, + format: weightleafdev, + }) + } + for _, t := range []struct { + name string + list []specs.LinuxThrottleDevice + }{ + { + name: "throttle.read_bps_device", + list: blkio.ThrottleReadBpsDevice, + }, + { + name: "throttle.read_iops_device", + list: blkio.ThrottleReadIOPSDevice, + }, + { + name: "throttle.write_bps_device", + list: blkio.ThrottleWriteBpsDevice, + }, + { + name: "throttle.write_iops_device", + list: blkio.ThrottleWriteIOPSDevice, + }, + } { + for _, td := range t.list { + settings = append(settings, blkioSettings{ + name: t.name, + value: td, + format: throttleddev, + }) + } + } + return settings +} + +type blkioSettings struct { + name string + value interface{} + format func(v interface{}) []byte +} + +type blkioStatSettings struct { + name string + entry *[]BlkioEntry +} + +func uintf(v interface{}) []byte { + return []byte(strconv.FormatUint(uint64(*v.(*uint16)), 10)) +} + +func weightdev(v interface{}) []byte { + wd := v.(specs.LinuxWeightDevice) + return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)) +} + +func weightleafdev(v interface{}) []byte { + wd := v.(specs.LinuxWeightDevice) + return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)) +} + +func throttleddev(v interface{}) []byte { + td := v.(specs.LinuxThrottleDevice) + return []byte(fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)) +} + +func splitBlkioStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +type deviceKey struct { + major, minor uint64 +} + +// getDevices makes a best effort attempt to read all the devices into a map +// keyed by major and minor number. Since devices may be mapped multiple times, +// we err on taking the first occurrence. +func getDevices(path string) (map[deviceKey]string, error) { + // TODO(stevvooe): We are ignoring lots of errors. It might be kind of + // challenging to debug this if we aren't mapping devices correctly. + // Consider logging these errors. + devices := map[deviceKey]string{} + if err := filepath.Walk(path, func(p string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + switch { + case fi.IsDir(): + switch fi.Name() { + case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts": + return filepath.SkipDir + default: + return nil + } + case fi.Name() == "console": + return nil + default: + if fi.Mode()&os.ModeDevice == 0 { + // skip non-devices + return nil + } + + st, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("%s: unable to convert to system stat", p) + } + + key := deviceKey{major(st.Rdev), minor(st.Rdev)} + if _, ok := devices[key]; ok { + return nil // skip it if we have already populated the path. + } + + devices[key] = p + } + + return nil + }); err != nil { + return nil, err + } + + return devices, nil +} + +func major(devNumber uint64) uint64 { + return (devNumber >> 8) & 0xfff +} + +func minor(devNumber uint64) uint64 { + return (devNumber & 0xff) | ((devNumber >> 12) & 0xfff00) +} diff --git a/vendor/github.com/containerd/cgroups/cgroup.go b/vendor/github.com/containerd/cgroups/cgroup.go new file mode 100644 index 00000000..74a324f9 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cgroup.go @@ -0,0 +1,389 @@ +package cgroups + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// New returns a new control via the cgroup cgroups interface +func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources) (Cgroup, error) { + subsystems, err := hierarchy() + if err != nil { + return nil, err + } + for _, s := range subsystems { + if err := initializeSubsystem(s, path, resources); err != nil { + return nil, err + } + } + return &cgroup{ + path: path, + subsystems: subsystems, + }, nil +} + +// Load will load an existing cgroup and allow it to be controlled +func Load(hierarchy Hierarchy, path Path) (Cgroup, error) { + subsystems, err := hierarchy() + if err != nil { + return nil, err + } + // check the the subsystems still exist + for _, s := range pathers(subsystems) { + p, err := path(s.Name()) + if err != nil { + return nil, err + } + if _, err := os.Lstat(s.Path(p)); err != nil { + if os.IsNotExist(err) { + return nil, ErrCgroupDeleted + } + return nil, err + } + } + return &cgroup{ + path: path, + subsystems: subsystems, + }, nil +} + +type cgroup struct { + path Path + + subsystems []Subsystem + mu sync.Mutex + err error +} + +// New returns a new sub cgroup +func (c *cgroup) New(name string, resources *specs.LinuxResources) (Cgroup, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + path := subPath(c.path, name) + for _, s := range c.subsystems { + if err := initializeSubsystem(s, path, resources); err != nil { + return nil, err + } + } + return &cgroup{ + path: path, + subsystems: c.subsystems, + }, nil +} + +// Subsystems returns all the subsystems that are currently being +// consumed by the group +func (c *cgroup) Subsystems() []Subsystem { + return c.subsystems +} + +// Add moves the provided process into the new cgroup +func (c *cgroup) Add(process Process) error { + if process.Pid <= 0 { + return ErrInvalidPid + } + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + return c.add(process) +} + +func (c *cgroup) add(process Process) error { + for _, s := range pathers(c.subsystems) { + p, err := c.path(s.Name()) + if err != nil { + return err + } + if err := ioutil.WriteFile( + filepath.Join(s.Path(p), cgroupProcs), + []byte(strconv.Itoa(process.Pid)), + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +// Delete will remove the control group from each of the subsystems registered +func (c *cgroup) Delete() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + var errors []string + for _, s := range c.subsystems { + if d, ok := s.(deleter); ok { + sp, err := c.path(s.Name()) + if err != nil { + return err + } + if err := d.Delete(sp); err != nil { + errors = append(errors, string(s.Name())) + } + continue + } + if p, ok := s.(pather); ok { + sp, err := c.path(s.Name()) + if err != nil { + return err + } + path := p.Path(sp) + if err := remove(path); err != nil { + errors = append(errors, path) + } + } + } + if len(errors) > 0 { + return fmt.Errorf("cgroups: unable to remove paths %s", strings.Join(errors, ", ")) + } + c.err = ErrCgroupDeleted + return nil +} + +// Stat returns the current stats for the cgroup +func (c *cgroup) Stat(handlers ...ErrorHandler) (*Stats, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + if len(handlers) == 0 { + handlers = append(handlers, errPassthrough) + } + var ( + stats = &Stats{} + wg = &sync.WaitGroup{} + errs = make(chan error, len(c.subsystems)) + ) + for _, s := range c.subsystems { + if ss, ok := s.(stater); ok { + sp, err := c.path(s.Name()) + if err != nil { + return nil, err + } + wg.Add(1) + go func() { + defer wg.Done() + if err := ss.Stat(sp, stats); err != nil { + for _, eh := range handlers { + if herr := eh(err); herr != nil { + errs <- herr + } + } + } + }() + } + } + wg.Wait() + close(errs) + for err := range errs { + return nil, err + } + return stats, nil +} + +// Update updates the cgroup with the new resource values provided +// +// Be prepared to handle EBUSY when trying to update a cgroup with +// live processes and other operations like Stats being performed at the +// same time +func (c *cgroup) Update(resources *specs.LinuxResources) error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + for _, s := range c.subsystems { + if u, ok := s.(updater); ok { + sp, err := c.path(s.Name()) + if err != nil { + return err + } + if err := u.Update(sp, resources); err != nil { + return err + } + } + } + return nil +} + +// Processes returns the processes running inside the cgroup along +// with the subsystem used, pid, and path +func (c *cgroup) Processes(subsystem Name, recursive bool) ([]Process, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + return c.processes(subsystem, recursive) +} + +func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) { + s := c.getSubsystem(subsystem) + sp, err := c.path(subsystem) + if err != nil { + return nil, err + } + path := s.(pather).Path(sp) + var processes []Process + err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !recursive && info.IsDir() { + if p == path { + return nil + } + return filepath.SkipDir + } + dir, name := filepath.Split(p) + if name != cgroupProcs { + return nil + } + procs, err := readPids(dir, subsystem) + if err != nil { + return err + } + processes = append(processes, procs...) + return nil + }) + return processes, err +} + +// Freeze freezes the entire cgroup and all the processes inside it +func (c *cgroup) Freeze() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + s := c.getSubsystem(Freezer) + if s == nil { + return ErrFreezerNotSupported + } + sp, err := c.path(Freezer) + if err != nil { + return err + } + return s.(*freezerController).Freeze(sp) +} + +// Thaw thaws out the cgroup and all the processes inside it +func (c *cgroup) Thaw() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + s := c.getSubsystem(Freezer) + if s == nil { + return ErrFreezerNotSupported + } + sp, err := c.path(Freezer) + if err != nil { + return err + } + return s.(*freezerController).Thaw(sp) +} + +// OOMEventFD returns the memory cgroup's out of memory event fd that triggers +// when processes inside the cgroup receive an oom event +func (c *cgroup) OOMEventFD() (uintptr, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return 0, c.err + } + s := c.getSubsystem(Memory) + if s == nil { + return 0, ErrMemoryNotSupported + } + sp, err := c.path(Memory) + if err != nil { + return 0, err + } + return s.(*memoryController).OOMEventFD(sp) +} + +// State returns the state of the cgroup and its processes +func (c *cgroup) State() State { + c.mu.Lock() + defer c.mu.Unlock() + c.checkExists() + if c.err != nil && c.err == ErrCgroupDeleted { + return Deleted + } + s := c.getSubsystem(Freezer) + if s == nil { + return Thawed + } + sp, err := c.path(Freezer) + if err != nil { + return Unknown + } + state, err := s.(*freezerController).state(sp) + if err != nil { + return Unknown + } + return state +} + +// MoveTo does a recursive move subsystem by subsystem of all the processes +// inside the group +func (c *cgroup) MoveTo(destination Cgroup) error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + for _, s := range c.subsystems { + processes, err := c.processes(s.Name(), true) + if err != nil { + return err + } + for _, p := range processes { + if err := destination.Add(p); err != nil { + return err + } + } + } + return nil +} + +func (c *cgroup) getSubsystem(n Name) Subsystem { + for _, s := range c.subsystems { + if s.Name() == n { + return s + } + } + return nil +} + +func (c *cgroup) checkExists() { + for _, s := range pathers(c.subsystems) { + p, err := c.path(s.Name()) + if err != nil { + return + } + if _, err := os.Lstat(s.Path(p)); err != nil { + if os.IsNotExist(err) { + c.err = ErrCgroupDeleted + return + } + } + } +} diff --git a/vendor/github.com/containerd/cgroups/control.go b/vendor/github.com/containerd/cgroups/control.go new file mode 100644 index 00000000..c83b2417 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/control.go @@ -0,0 +1,58 @@ +package cgroups + +import ( + "os" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + cgroupProcs = "cgroup.procs" + defaultDirPerm = 0755 +) + +// defaultFilePerm is a var so that the test framework can change the filemode +// of all files created when the tests are running. The difference between the +// tests and real world use is that files like "cgroup.procs" will exist when writing +// to a read cgroup filesystem and do not exist prior when running in the tests. +// this is set to a non 0 value in the test code +var defaultFilePerm = os.FileMode(0) + +type Process struct { + // Subsystem is the name of the subsystem that the process is in + Subsystem Name + // Pid is the process id of the process + Pid int + // Path is the full path of the subsystem and location that the process is in + Path string +} + +// Cgroup handles interactions with the individual groups to perform +// actions on them as them main interface to this cgroup package +type Cgroup interface { + // New creates a new cgroup under the calling cgroup + New(string, *specs.LinuxResources) (Cgroup, error) + // Add adds a process to the cgroup + Add(Process) error + // Delete removes the cgroup as a whole + Delete() error + // MoveTo moves all the processes under the calling cgroup to the provided one + // subsystems are moved one at a time + MoveTo(Cgroup) error + // Stat returns the stats for all subsystems in the cgroup + Stat(...ErrorHandler) (*Stats, error) + // Update updates all the subsystems with the provided resource changes + Update(resources *specs.LinuxResources) error + // Processes returns all the processes in a select subsystem for the cgroup + Processes(Name, bool) ([]Process, error) + // Freeze freezes or pauses all processes inside the cgroup + Freeze() error + // Thaw thaw or resumes all processes inside the cgroup + Thaw() error + // OOMEventFD returns the memory subsystem's event fd for OOM events + OOMEventFD() (uintptr, error) + // State returns the cgroups current state + State() State + // Subsystems returns all the subsystems in the cgroup + Subsystems() []Subsystem +} diff --git a/vendor/github.com/containerd/cgroups/cpu.go b/vendor/github.com/containerd/cgroups/cpu.go new file mode 100644 index 00000000..036bb8fd --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cpu.go @@ -0,0 +1,120 @@ +package cgroups + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewCpu(root string) *cpuController { + return &cpuController{ + root: filepath.Join(root, string(Cpu)), + } +} + +type cpuController struct { + root string +} + +func (c *cpuController) Name() Name { + return Cpu +} + +func (c *cpuController) Path(path string) string { + return filepath.Join(c.root, path) +} + +func (c *cpuController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(c.Path(path), defaultDirPerm); err != nil { + return err + } + if cpu := resources.CPU; cpu != nil { + for _, t := range []struct { + name string + ivalue *int64 + uvalue *uint64 + }{ + { + name: "rt_period_us", + uvalue: cpu.RealtimePeriod, + }, + { + name: "rt_runtime_us", + ivalue: cpu.RealtimeRuntime, + }, + { + name: "shares", + uvalue: cpu.Shares, + }, + { + name: "cfs_period_us", + uvalue: cpu.Period, + }, + { + name: "cfs_quota_us", + ivalue: cpu.Quota, + }, + } { + var value []byte + if t.uvalue != nil { + value = []byte(strconv.FormatUint(*t.uvalue, 10)) + } else if t.ivalue != nil { + value = []byte(strconv.FormatInt(*t.ivalue, 10)) + } + if value != nil { + if err := ioutil.WriteFile( + filepath.Join(c.Path(path), fmt.Sprintf("cpu.%s", t.name)), + value, + defaultFilePerm, + ); err != nil { + return err + } + } + } + } + return nil +} + +func (c *cpuController) Update(path string, resources *specs.LinuxResources) error { + return c.Create(path, resources) +} + +func (c *cpuController) Stat(path string, stats *Stats) error { + f, err := os.Open(filepath.Join(c.Path(path), "cpu.stat")) + if err != nil { + return err + } + defer f.Close() + // get or create the cpu field because cpuacct can also set values on this struct + stats.cpuMu.Lock() + cpu := stats.Cpu + if cpu == nil { + cpu = &CpuStat{} + stats.Cpu = cpu + } + stats.cpuMu.Unlock() + sc := bufio.NewScanner(f) + for sc.Scan() { + if err := sc.Err(); err != nil { + return err + } + key, v, err := parseKV(sc.Text()) + if err != nil { + return err + } + switch key { + case "nr_periods": + cpu.Throttling.Periods = v + case "nr_throttled": + cpu.Throttling.ThrottledPeriods = v + case "throttled_time": + cpu.Throttling.ThrottledTime = v + } + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/cpuacct.go b/vendor/github.com/containerd/cgroups/cpuacct.go new file mode 100644 index 00000000..32923488 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cpuacct.go @@ -0,0 +1,112 @@ +package cgroups + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +const nanosecondsInSecond = 1000000000 + +var clockTicks = getClockTicks() + +func NewCpuacct(root string) *cpuacctController { + return &cpuacctController{ + root: filepath.Join(root, string(Cpuacct)), + } +} + +type cpuacctController struct { + root string +} + +func (c *cpuacctController) Name() Name { + return Cpuacct +} + +func (c *cpuacctController) Path(path string) string { + return filepath.Join(c.root, path) +} + +func (c *cpuacctController) Stat(path string, stats *Stats) error { + user, kernel, err := c.getUsage(path) + if err != nil { + return err + } + total, err := readUint(filepath.Join(c.Path(path), "cpuacct.usage")) + if err != nil { + return err + } + percpu, err := c.percpuUsage(path) + if err != nil { + return err + } + stats.cpuMu.Lock() + cpu := stats.Cpu + if cpu == nil { + cpu = &CpuStat{} + stats.Cpu = cpu + } + stats.cpuMu.Unlock() + cpu.Usage.Total = total + cpu.Usage.User = user + cpu.Usage.Kernel = kernel + cpu.Usage.PerCpu = percpu + return nil +} + +func (c *cpuacctController) percpuUsage(path string) ([]uint64, error) { + var usage []uint64 + data, err := ioutil.ReadFile(filepath.Join(c.Path(path), "cpuacct.usage_percpu")) + if err != nil { + return nil, err + } + for _, v := range strings.Fields(string(data)) { + u, err := strconv.ParseUint(v, 10, 64) + if err != nil { + return nil, err + } + usage = append(usage, u) + } + return usage, nil +} + +func (c *cpuacctController) getUsage(path string) (user uint64, kernel uint64, err error) { + statPath := filepath.Join(c.Path(path), "cpuacct.stat") + data, err := ioutil.ReadFile(statPath) + if err != nil { + return 0, 0, err + } + fields := strings.Fields(string(data)) + if len(fields) != 4 { + return 0, 0, fmt.Errorf("%q is expected to have 4 fields", statPath) + } + for _, t := range []struct { + index int + name string + value *uint64 + }{ + { + index: 0, + name: "user", + value: &user, + }, + { + index: 2, + name: "system", + value: &kernel, + }, + } { + if fields[t.index] != t.name { + return 0, 0, fmt.Errorf("expected field %q but found %q in %q", t.name, fields[t.index], statPath) + } + v, err := strconv.ParseUint(fields[t.index+1], 10, 64) + if err != nil { + return 0, 0, err + } + *t.value = v + } + return (user * nanosecondsInSecond) / clockTicks, (kernel * nanosecondsInSecond) / clockTicks, nil +} diff --git a/vendor/github.com/containerd/cgroups/cpuset.go b/vendor/github.com/containerd/cgroups/cpuset.go new file mode 100644 index 00000000..f0f95f56 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cpuset.go @@ -0,0 +1,139 @@ +package cgroups + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewCputset(root string) *cpusetController { + return &cpusetController{ + root: filepath.Join(root, string(Cpuset)), + } +} + +type cpusetController struct { + root string +} + +func (c *cpusetController) Name() Name { + return Cpuset +} + +func (c *cpusetController) Path(path string) string { + return filepath.Join(c.root, path) +} + +func (c *cpusetController) Create(path string, resources *specs.LinuxResources) error { + if err := c.ensureParent(c.Path(path), c.root); err != nil { + return err + } + if err := os.MkdirAll(c.Path(path), defaultDirPerm); err != nil { + return err + } + if err := c.copyIfNeeded(c.Path(path), filepath.Dir(c.Path(path))); err != nil { + return err + } + if resources.CPU != nil { + for _, t := range []struct { + name string + value *string + }{ + { + name: "cpus", + value: &resources.CPU.Cpus, + }, + { + name: "mems", + value: &resources.CPU.Mems, + }, + } { + if t.value != nil { + if err := ioutil.WriteFile( + filepath.Join(c.Path(path), fmt.Sprintf("cpuset.%s", t.name)), + []byte(*t.value), + defaultFilePerm, + ); err != nil { + return err + } + } + } + } + return nil +} + +func (c *cpusetController) getValues(path string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(path, "cpuset.cpus")); err != nil && !os.IsNotExist(err) { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(path, "cpuset.mems")); err != nil && !os.IsNotExist(err) { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (c *cpusetController) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if _, err := filepath.Rel(root, parent); err != nil { + return nil + } + // Avoid infinite recursion. + if parent == current { + return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") + } + if cleanPath(parent) != root { + if err := c.ensureParent(parent, root); err != nil { + return err + } + } + if err := os.MkdirAll(current, defaultDirPerm); err != nil { + return err + } + return c.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (c *cpusetController) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + if currentCpus, currentMems, err = c.getValues(current); err != nil { + return err + } + if parentCpus, parentMems, err = c.getValues(parent); err != nil { + return err + } + if isEmpty(currentCpus) { + if err := ioutil.WriteFile( + filepath.Join(current, "cpuset.cpus"), + parentCpus, + defaultFilePerm, + ); err != nil { + return err + } + } + if isEmpty(currentMems) { + if err := ioutil.WriteFile( + filepath.Join(current, "cpuset.mems"), + parentMems, + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +func isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} diff --git a/vendor/github.com/containerd/cgroups/devices.go b/vendor/github.com/containerd/cgroups/devices.go new file mode 100644 index 00000000..f0dca5c5 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/devices.go @@ -0,0 +1,74 @@ +package cgroups + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + allowDeviceFile = "devices.allow" + denyDeviceFile = "devices.deny" + wildcard = -1 +) + +func NewDevices(root string) *devicesController { + return &devicesController{ + root: filepath.Join(root, string(Devices)), + } +} + +type devicesController struct { + root string +} + +func (d *devicesController) Name() Name { + return Devices +} + +func (d *devicesController) Path(path string) string { + return filepath.Join(d.root, path) +} + +func (d *devicesController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(d.Path(path), defaultDirPerm); err != nil { + return err + } + for _, device := range resources.Devices { + file := denyDeviceFile + if device.Allow { + file = allowDeviceFile + } + if err := ioutil.WriteFile( + filepath.Join(d.Path(path), file), + []byte(deviceString(device)), + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +func (d *devicesController) Update(path string, resources *specs.LinuxResources) error { + return d.Create(path, resources) +} + +func deviceString(device specs.LinuxDeviceCgroup) string { + return fmt.Sprintf("%c %s:%s %s", + &device.Type, + deviceNumber(device.Major), + deviceNumber(device.Minor), + &device.Access, + ) +} + +func deviceNumber(number *int64) string { + if number == nil || *number == wildcard { + return "*" + } + return fmt.Sprint(*number) +} diff --git a/vendor/github.com/containerd/cgroups/errors.go b/vendor/github.com/containerd/cgroups/errors.go new file mode 100644 index 00000000..d18b4b1d --- /dev/null +++ b/vendor/github.com/containerd/cgroups/errors.go @@ -0,0 +1,31 @@ +package cgroups + +import ( + "errors" + "os" +) + +var ( + ErrInvalidPid = errors.New("cgroups: pid must be greater than 0") + ErrMountPointNotExist = errors.New("cgroups: cgroup mountpoint does not exist") + ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed") + ErrFreezerNotSupported = errors.New("cgroups: freezer cgroup not supported on this system") + ErrMemoryNotSupported = errors.New("cgroups: memory cgroup not supported on this system") + ErrCgroupDeleted = errors.New("cgroups: cgroup deleted") + ErrNoCgroupMountDestination = errors.New("cgroups: cannot found cgroup mount destination") +) + +// ErrorHandler is a function that handles and acts on errors +type ErrorHandler func(err error) error + +// IgnoreNotExist ignores any errors that are for not existing files +func IgnoreNotExist(err error) error { + if os.IsNotExist(err) { + return nil + } + return err +} + +func errPassthrough(err error) error { + return err +} diff --git a/vendor/github.com/containerd/cgroups/freezer.go b/vendor/github.com/containerd/cgroups/freezer.go new file mode 100644 index 00000000..f53430b5 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/freezer.go @@ -0,0 +1,69 @@ +package cgroups + +import ( + "io/ioutil" + "path/filepath" + "strings" + "time" +) + +func NewFreezer(root string) *freezerController { + return &freezerController{ + root: filepath.Join(root, string(Freezer)), + } +} + +type freezerController struct { + root string +} + +func (f *freezerController) Name() Name { + return Freezer +} + +func (f *freezerController) Path(path string) string { + return filepath.Join(f.root, path) +} + +func (f *freezerController) Freeze(path string) error { + if err := f.changeState(path, Frozen); err != nil { + return err + } + return f.waitState(path, Frozen) +} + +func (f *freezerController) Thaw(path string) error { + if err := f.changeState(path, Thawed); err != nil { + return err + } + return f.waitState(path, Thawed) +} + +func (f *freezerController) changeState(path string, state State) error { + return ioutil.WriteFile( + filepath.Join(f.root, path, "freezer.state"), + []byte(strings.ToUpper(string(state))), + defaultFilePerm, + ) +} + +func (f *freezerController) state(path string) (State, error) { + current, err := ioutil.ReadFile(filepath.Join(f.root, path, "freezer.state")) + if err != nil { + return "", err + } + return State(strings.ToLower(strings.TrimSpace(string(current)))), nil +} + +func (f *freezerController) waitState(path string, state State) error { + for { + current, err := f.state(path) + if err != nil { + return err + } + if current == state { + return nil + } + time.Sleep(1 * time.Millisecond) + } +} diff --git a/vendor/github.com/containerd/cgroups/hierarchy.go b/vendor/github.com/containerd/cgroups/hierarchy.go new file mode 100644 index 00000000..b61660d4 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/hierarchy.go @@ -0,0 +1,4 @@ +package cgroups + +// Hierarchy enableds both unified and split hierarchy for cgroups +type Hierarchy func() ([]Subsystem, error) diff --git a/vendor/github.com/containerd/cgroups/hugetlb.go b/vendor/github.com/containerd/cgroups/hugetlb.go new file mode 100644 index 00000000..650bb069 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/hugetlb.go @@ -0,0 +1,92 @@ +package cgroups + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewHugetlb(root string) (*hugetlbController, error) { + sizes, err := hugePageSizes() + if err != nil { + return nil, err + } + + return &hugetlbController{ + root: filepath.Join(root, string(Hugetlb)), + sizes: sizes, + }, nil +} + +type hugetlbController struct { + root string + sizes []string +} + +func (h *hugetlbController) Name() Name { + return Hugetlb +} + +func (h *hugetlbController) Path(path string) string { + return filepath.Join(h.root, path) +} + +func (h *hugetlbController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(h.Path(path), defaultDirPerm); err != nil { + return err + } + for _, limit := range resources.HugepageLimits { + if err := ioutil.WriteFile( + filepath.Join(h.Path(path), strings.Join([]string{"hugetlb", limit.Pagesize, "limit_in_bytes"}, ".")), + []byte(strconv.FormatUint(limit.Limit, 10)), + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +func (h *hugetlbController) Stat(path string, stats *Stats) error { + stats.Hugetlb = make(map[string]HugetlbStat) + for _, size := range h.sizes { + s, err := h.readSizeStat(path, size) + if err != nil { + return err + } + stats.Hugetlb[size] = s + } + return nil +} + +func (h *hugetlbController) readSizeStat(path, size string) (HugetlbStat, error) { + var s HugetlbStat + for _, t := range []struct { + name string + value *uint64 + }{ + { + name: "usage_in_bytes", + value: &s.Usage, + }, + { + name: "max_usage_in_bytes", + value: &s.Max, + }, + { + name: "failcnt", + value: &s.Failcnt, + }, + } { + v, err := readUint(filepath.Join(h.Path(path), strings.Join([]string{"hugetlb", size, t.name}, "."))) + if err != nil { + return s, err + } + *t.value = v + } + return s, nil +} diff --git a/vendor/github.com/containerd/cgroups/memory.go b/vendor/github.com/containerd/cgroups/memory.go new file mode 100644 index 00000000..9e2ddf2e --- /dev/null +++ b/vendor/github.com/containerd/cgroups/memory.go @@ -0,0 +1,304 @@ +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + + "golang.org/x/sys/unix" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewMemory(root string) *memoryController { + return &memoryController{ + root: filepath.Join(root, string(Memory)), + } +} + +type memoryController struct { + root string +} + +func (m *memoryController) Name() Name { + return Memory +} + +func (m *memoryController) Path(path string) string { + return filepath.Join(m.root, path) +} + +func (m *memoryController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(m.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Memory == nil { + return nil + } + if resources.Memory.Kernel != nil { + // Check if kernel memory is enabled + // We have to limit the kernel memory here as it won't be accounted at all + // until a limit is set on the cgroup and limit cannot be set once the + // cgroup has children, or if there are already tasks in the cgroup. + for _, i := range []int64{1, -1} { + if err := ioutil.WriteFile( + filepath.Join(m.Path(path), "memory.kmem.limit_in_bytes"), + []byte(strconv.FormatInt(i, 10)), + defaultFilePerm, + ); err != nil { + return checkEBUSY(err) + } + } + } + return m.set(path, getMemorySettings(resources)) +} + +func (m *memoryController) Update(path string, resources *specs.LinuxResources) error { + if resources.Memory == nil { + return nil + } + g := func(v *int64) bool { + return v != nil && *v > 0 + } + settings := getMemorySettings(resources) + if g(resources.Memory.Limit) && g(resources.Memory.Swap) { + // if the updated swap value is larger than the current memory limit set the swap changes first + // then set the memory limit as swap must always be larger than the current limit + current, err := readUint(filepath.Join(m.Path(path), "memory.limit_in_bytes")) + if err != nil { + return err + } + if current < uint64(*resources.Memory.Swap) { + settings[0], settings[1] = settings[1], settings[0] + } + } + return m.set(path, settings) +} + +func (m *memoryController) Stat(path string, stats *Stats) error { + f, err := os.Open(filepath.Join(m.Path(path), "memory.stat")) + if err != nil { + return err + } + defer f.Close() + stats.Memory = &MemoryStat{} + if err := m.parseStats(f, stats.Memory); err != nil { + return err + } + for _, t := range []struct { + module string + entry *MemoryEntry + }{ + { + module: "", + entry: &stats.Memory.Usage, + }, + { + module: "memsw", + entry: &stats.Memory.Swap, + }, + { + module: "kmem", + entry: &stats.Memory.Kernel, + }, + { + module: "kmem.tcp", + entry: &stats.Memory.KernelTCP, + }, + } { + for _, tt := range []struct { + name string + value *uint64 + }{ + { + name: "usage_in_bytes", + value: &t.entry.Usage, + }, + { + name: "max_usage_in_bytes", + value: &t.entry.Max, + }, + { + name: "failcnt", + value: &t.entry.Failcnt, + }, + { + name: "limit_in_bytes", + value: &t.entry.Limit, + }, + } { + parts := []string{"memory"} + if t.module != "" { + parts = append(parts, t.module) + } + parts = append(parts, tt.name) + v, err := readUint(filepath.Join(m.Path(path), strings.Join(parts, "."))) + if err != nil { + return err + } + *tt.value = v + } + } + return nil +} + +func (m *memoryController) OOMEventFD(path string) (uintptr, error) { + root := m.Path(path) + f, err := os.Open(filepath.Join(root, "memory.oom_control")) + if err != nil { + return 0, err + } + defer f.Close() + fd, _, serr := unix.RawSyscall(unix.SYS_EVENTFD2, 0, unix.FD_CLOEXEC, 0) + if serr != 0 { + return 0, serr + } + if err := writeEventFD(root, f.Fd(), fd); err != nil { + unix.Close(int(fd)) + return 0, err + } + return fd, nil +} + +func writeEventFD(root string, cfd, efd uintptr) error { + f, err := os.OpenFile(filepath.Join(root, "cgroup.event_control"), os.O_WRONLY, 0) + if err != nil { + return err + } + _, err = f.WriteString(fmt.Sprintf("%d %d", efd, cfd)) + f.Close() + return err +} + +func (m *memoryController) parseStats(r io.Reader, stat *MemoryStat) error { + var ( + raw = make(map[string]uint64) + sc = bufio.NewScanner(r) + line int + ) + for sc.Scan() { + if err := sc.Err(); err != nil { + return err + } + key, v, err := parseKV(sc.Text()) + if err != nil { + return fmt.Errorf("%d: %v", line, err) + } + raw[key] = v + line++ + } + stat.Cache = raw["cache"] + stat.RSS = raw["rss"] + stat.RSSHuge = raw["rss_huge"] + stat.MappedFile = raw["mapped_file"] + stat.Dirty = raw["dirty"] + stat.Writeback = raw["writeback"] + stat.PgPgIn = raw["pgpgin"] + stat.PgPgOut = raw["pgpgout"] + stat.PgFault = raw["pgfault"] + stat.PgMajFault = raw["pgmajfault"] + stat.InactiveAnon = raw["inactive_anon"] + stat.ActiveAnon = raw["active_anon"] + stat.InactiveFile = raw["inactive_file"] + stat.ActiveFile = raw["active_file"] + stat.Unevictable = raw["unevictable"] + stat.HierarchicalMemoryLimit = raw["hierarchical_memory_limit"] + stat.HierarchicalSwapLimit = raw["hierarchical_memsw_limit"] + stat.TotalCache = raw["total_cache"] + stat.TotalRSS = raw["total_rss"] + stat.TotalRSSHuge = raw["total_rss_huge"] + stat.TotalMappedFile = raw["total_mapped_file"] + stat.TotalDirty = raw["total_dirty"] + stat.TotalWriteback = raw["total_writeback"] + stat.TotalPgPgIn = raw["total_pgpgin"] + stat.TotalPgPgOut = raw["total_pgpgout"] + stat.TotalPgFault = raw["total_pgfault"] + stat.TotalPgMajFault = raw["total_pgmajfault"] + stat.TotalInactiveAnon = raw["total_inactive_anon"] + stat.TotalActiveAnon = raw["total_active_anon"] + stat.TotalInactiveFile = raw["total_inactive_file"] + stat.TotalActiveFile = raw["total_active_file"] + stat.TotalUnevictable = raw["total_unevictable"] + return nil +} + +func (m *memoryController) set(path string, settings []memorySettings) error { + for _, t := range settings { + if t.value != nil { + if err := ioutil.WriteFile( + filepath.Join(m.Path(path), fmt.Sprintf("memory.%s", t.name)), + []byte(strconv.FormatInt(*t.value, 10)), + defaultFilePerm, + ); err != nil { + return err + } + } + } + return nil +} + +type memorySettings struct { + name string + value *int64 +} + +func getMemorySettings(resources *specs.LinuxResources) []memorySettings { + mem := resources.Memory + var swappiness *int64 + if mem.Swappiness != nil { + v := int64(*mem.Swappiness) + swappiness = &v + } + return []memorySettings{ + { + name: "limit_in_bytes", + value: mem.Limit, + }, + { + name: "memsw.limit_in_bytes", + value: mem.Swap, + }, + { + name: "kmem.limit_in_bytes", + value: mem.Kernel, + }, + { + name: "kmem.tcp.limit_in_bytes", + value: mem.KernelTCP, + }, + { + name: "oom_control", + value: getOomControlValue(mem), + }, + { + name: "swappiness", + value: swappiness, + }, + } +} + +func checkEBUSY(err error) error { + if pathErr, ok := err.(*os.PathError); ok { + if errNo, ok := pathErr.Err.(syscall.Errno); ok { + if errNo == unix.EBUSY { + return fmt.Errorf( + "failed to set memory.kmem.limit_in_bytes, because either tasks have already joined this cgroup or it has children") + } + } + } + return err +} + +func getOomControlValue(mem *specs.LinuxMemory) *int64 { + if mem.DisableOOMKiller != nil && *mem.DisableOOMKiller { + i := int64(1) + return &i + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/named.go b/vendor/github.com/containerd/cgroups/named.go new file mode 100644 index 00000000..f0fbf018 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/named.go @@ -0,0 +1,23 @@ +package cgroups + +import "path/filepath" + +func NewNamed(root string, name Name) *namedController { + return &namedController{ + root: root, + name: name, + } +} + +type namedController struct { + root string + name Name +} + +func (n *namedController) Name() Name { + return n.name +} + +func (n *namedController) Path(path string) string { + return filepath.Join(n.root, string(n.name), path) +} diff --git a/vendor/github.com/containerd/cgroups/net_cls.go b/vendor/github.com/containerd/cgroups/net_cls.go new file mode 100644 index 00000000..1558444b --- /dev/null +++ b/vendor/github.com/containerd/cgroups/net_cls.go @@ -0,0 +1,42 @@ +package cgroups + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewNetCls(root string) *netclsController { + return &netclsController{ + root: filepath.Join(root, string(NetCLS)), + } +} + +type netclsController struct { + root string +} + +func (n *netclsController) Name() Name { + return NetCLS +} + +func (n *netclsController) Path(path string) string { + return filepath.Join(n.root, path) +} + +func (n *netclsController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(n.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Network != nil && resources.Network.ClassID != nil && *resources.Network.ClassID > 0 { + return ioutil.WriteFile( + filepath.Join(n.Path(path), "net_cls.classid"), + []byte(strconv.FormatUint(uint64(*resources.Network.ClassID), 10)), + defaultFilePerm, + ) + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/net_prio.go b/vendor/github.com/containerd/cgroups/net_prio.go new file mode 100644 index 00000000..0959b8e7 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/net_prio.go @@ -0,0 +1,50 @@ +package cgroups + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewNetPrio(root string) *netprioController { + return &netprioController{ + root: filepath.Join(root, string(NetPrio)), + } +} + +type netprioController struct { + root string +} + +func (n *netprioController) Name() Name { + return NetPrio +} + +func (n *netprioController) Path(path string) string { + return filepath.Join(n.root, path) +} + +func (n *netprioController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(n.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Network != nil { + for _, prio := range resources.Network.Priorities { + if err := ioutil.WriteFile( + filepath.Join(n.Path(path), "net_prio_ifpriomap"), + formatPrio(prio.Name, prio.Priority), + defaultFilePerm, + ); err != nil { + return err + } + } + } + return nil +} + +func formatPrio(name string, prio uint32) []byte { + return []byte(fmt.Sprintf("%s %d", name, prio)) +} diff --git a/vendor/github.com/containerd/cgroups/paths.go b/vendor/github.com/containerd/cgroups/paths.go new file mode 100644 index 00000000..1afc24b6 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/paths.go @@ -0,0 +1,88 @@ +package cgroups + +import ( + "fmt" + "path/filepath" + + "github.com/pkg/errors" +) + +type Path func(subsystem Name) (string, error) + +func RootPath(subsysem Name) (string, error) { + return "/", nil +} + +// StaticPath returns a static path to use for all cgroups +func StaticPath(path string) Path { + return func(_ Name) (string, error) { + return path, nil + } +} + +// NestedPath will nest the cgroups based on the calling processes cgroup +// placing its child processes inside its own path +func NestedPath(suffix string) Path { + paths, err := parseCgroupFile("/proc/self/cgroup") + if err != nil { + return errorPath(err) + } + return existingPath(paths, suffix) +} + +// PidPath will return the correct cgroup paths for an existing process running inside a cgroup +// This is commonly used for the Load function to restore an existing container +func PidPath(pid int) Path { + p := fmt.Sprintf("/proc/%d/cgroup", pid) + paths, err := parseCgroupFile(p) + if err != nil { + return errorPath(errors.Wrapf(err, "parse cgroup file %s", p)) + } + return existingPath(paths, "") +} + +func existingPath(paths map[string]string, suffix string) Path { + // localize the paths based on the root mount dest for nested cgroups + for n, p := range paths { + dest, err := getCgroupDestination(string(n)) + if err != nil { + return errorPath(err) + } + rel, err := filepath.Rel(dest, p) + if err != nil { + return errorPath(err) + } + if rel == "." { + rel = dest + } + paths[n] = filepath.Join("/", rel) + } + return func(name Name) (string, error) { + root, ok := paths[string(name)] + if !ok { + if root, ok = paths[fmt.Sprintf("name=%s", name)]; !ok { + return "", fmt.Errorf("unable to find %q in controller set", name) + } + } + if suffix != "" { + return filepath.Join(root, suffix), nil + } + return root, nil + } +} + +func subPath(path Path, subName string) Path { + return func(name Name) (string, error) { + p, err := path(name) + if err != nil { + return "", err + } + return filepath.Join(p, subName), nil + } +} + +func errorPath(err error) Path { + return func(_ Name) (string, error) { + return "", err + } +} diff --git a/vendor/github.com/containerd/cgroups/perf_event.go b/vendor/github.com/containerd/cgroups/perf_event.go new file mode 100644 index 00000000..0fa43ec1 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/perf_event.go @@ -0,0 +1,21 @@ +package cgroups + +import "path/filepath" + +func NewPerfEvent(root string) *PerfEventController { + return &PerfEventController{ + root: filepath.Join(root, string(PerfEvent)), + } +} + +type PerfEventController struct { + root string +} + +func (p *PerfEventController) Name() Name { + return PerfEvent +} + +func (p *PerfEventController) Path(path string) string { + return filepath.Join(p.root, path) +} diff --git a/vendor/github.com/containerd/cgroups/pids.go b/vendor/github.com/containerd/cgroups/pids.go new file mode 100644 index 00000000..bdcc10a9 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/pids.go @@ -0,0 +1,69 @@ +package cgroups + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewPids(root string) *pidsController { + return &pidsController{ + root: filepath.Join(root, string(Pids)), + } +} + +type pidsController struct { + root string +} + +func (p *pidsController) Name() Name { + return Pids +} + +func (p *pidsController) Path(path string) string { + return filepath.Join(p.root, path) +} + +func (p *pidsController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(p.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Pids != nil && resources.Pids.Limit > 0 { + return ioutil.WriteFile( + filepath.Join(p.Path(path), "pids.max"), + []byte(strconv.FormatInt(resources.Pids.Limit, 10)), + defaultFilePerm, + ) + } + return nil +} + +func (p *pidsController) Update(path string, resources *specs.LinuxResources) error { + return p.Create(path, resources) +} + +func (p *pidsController) Stat(path string, stats *Stats) error { + current, err := readUint(filepath.Join(p.Path(path), "pids.current")) + if err != nil { + return err + } + var max uint64 + maxData, err := ioutil.ReadFile(filepath.Join(p.Path(path), "pids.max")) + if err != nil { + return err + } + if maxS := strings.TrimSpace(string(maxData)); maxS != "max" { + if max, err = parseUint(maxS, 10, 64); err != nil { + return err + } + } + stats.Pids = &PidsStat{ + Current: current, + Limit: max, + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/state.go b/vendor/github.com/containerd/cgroups/state.go new file mode 100644 index 00000000..f1d7b7b6 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/state.go @@ -0,0 +1,12 @@ +package cgroups + +// State is a type that represents the state of the current cgroup +type State string + +const ( + Unknown State = "" + Thawed State = "thawed" + Frozen State = "frozen" + Freezing State = "freezing" + Deleted State = "deleted" +) diff --git a/vendor/github.com/containerd/cgroups/stats.go b/vendor/github.com/containerd/cgroups/stats.go new file mode 100644 index 00000000..47fbfa96 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/stats.go @@ -0,0 +1,109 @@ +package cgroups + +import "sync" + +type Stats struct { + cpuMu sync.Mutex + + Hugetlb map[string]HugetlbStat + Pids *PidsStat + Cpu *CpuStat + Memory *MemoryStat + Blkio *BlkioStat +} + +type HugetlbStat struct { + Usage uint64 + Max uint64 + Failcnt uint64 +} + +type PidsStat struct { + Current uint64 + Limit uint64 +} + +type CpuStat struct { + Usage CpuUsage + Throttling Throttle +} + +type CpuUsage struct { + // Units: nanoseconds. + Total uint64 + PerCpu []uint64 + Kernel uint64 + User uint64 +} + +type Throttle struct { + Periods uint64 + ThrottledPeriods uint64 + ThrottledTime uint64 +} + +type MemoryStat struct { + Cache uint64 + RSS uint64 + RSSHuge uint64 + MappedFile uint64 + Dirty uint64 + Writeback uint64 + PgPgIn uint64 + PgPgOut uint64 + PgFault uint64 + PgMajFault uint64 + InactiveAnon uint64 + ActiveAnon uint64 + InactiveFile uint64 + ActiveFile uint64 + Unevictable uint64 + HierarchicalMemoryLimit uint64 + HierarchicalSwapLimit uint64 + TotalCache uint64 + TotalRSS uint64 + TotalRSSHuge uint64 + TotalMappedFile uint64 + TotalDirty uint64 + TotalWriteback uint64 + TotalPgPgIn uint64 + TotalPgPgOut uint64 + TotalPgFault uint64 + TotalPgMajFault uint64 + TotalInactiveAnon uint64 + TotalActiveAnon uint64 + TotalInactiveFile uint64 + TotalActiveFile uint64 + TotalUnevictable uint64 + + Usage MemoryEntry + Swap MemoryEntry + Kernel MemoryEntry + KernelTCP MemoryEntry +} + +type MemoryEntry struct { + Limit uint64 + Usage uint64 + Max uint64 + Failcnt uint64 +} + +type BlkioStat struct { + IoServiceBytesRecursive []BlkioEntry + IoServicedRecursive []BlkioEntry + IoQueuedRecursive []BlkioEntry + IoServiceTimeRecursive []BlkioEntry + IoWaitTimeRecursive []BlkioEntry + IoMergedRecursive []BlkioEntry + IoTimeRecursive []BlkioEntry + SectorsRecursive []BlkioEntry +} + +type BlkioEntry struct { + Op string + Device string + Major uint64 + Minor uint64 + Value uint64 +} diff --git a/vendor/github.com/containerd/cgroups/subsystem.go b/vendor/github.com/containerd/cgroups/subsystem.go new file mode 100644 index 00000000..aab403b8 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/subsystem.go @@ -0,0 +1,94 @@ +package cgroups + +import ( + "fmt" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// Name is a typed name for a cgroup subsystem +type Name string + +const ( + Devices Name = "devices" + Hugetlb Name = "hugetlb" + Freezer Name = "freezer" + Pids Name = "pids" + NetCLS Name = "net_cls" + NetPrio Name = "net_prio" + PerfEvent Name = "perf_event" + Cpuset Name = "cpuset" + Cpu Name = "cpu" + Cpuacct Name = "cpuacct" + Memory Name = "memory" + Blkio Name = "blkio" +) + +// Subsystems returns a complete list of the default cgroups +// avaliable on most linux systems +func Subsystems() []Name { + n := []Name{ + Hugetlb, + Freezer, + Pids, + NetCLS, + NetPrio, + PerfEvent, + Cpuset, + Cpu, + Cpuacct, + Memory, + Blkio, + } + if !isUserNS { + n = append(n, Devices) + } + return n +} + +type Subsystem interface { + Name() Name +} + +type pather interface { + Subsystem + Path(path string) string +} + +type creator interface { + Subsystem + Create(path string, resources *specs.LinuxResources) error +} + +type deleter interface { + Subsystem + Delete(path string) error +} + +type stater interface { + Subsystem + Stat(path string, stats *Stats) error +} + +type updater interface { + Subsystem + Update(path string, resources *specs.LinuxResources) error +} + +// SingleSubsystem returns a single cgroup subsystem within the base Hierarchy +func SingleSubsystem(baseHierarchy Hierarchy, subsystem Name) Hierarchy { + return func() ([]Subsystem, error) { + subsystems, err := baseHierarchy() + if err != nil { + return nil, err + } + for _, s := range subsystems { + if s.Name() == subsystem { + return []Subsystem{ + s, + }, nil + } + } + return nil, fmt.Errorf("unable to find subsystem %s", subsystem) + } +} diff --git a/vendor/github.com/containerd/cgroups/systemd.go b/vendor/github.com/containerd/cgroups/systemd.go new file mode 100644 index 00000000..5e052a82 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/systemd.go @@ -0,0 +1,101 @@ +package cgroups + +import ( + "fmt" + "path/filepath" + "strings" + "sync" + + systemdDbus "github.com/coreos/go-systemd/dbus" + "github.com/godbus/dbus" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + SystemdDbus Name = "systemd" + defaultSlice = "system.slice" +) + +func Systemd() ([]Subsystem, error) { + root, err := v1MountPoint() + if err != nil { + return nil, err + } + defaultSubsystems, err := defaults(root) + if err != nil { + return nil, err + } + s, err := NewSystemd(root) + if err != nil { + return nil, err + } + // make sure the systemd controller is added first + return append([]Subsystem{s}, defaultSubsystems...), nil +} + +func Slice(slice, name string) Path { + if slice == "" { + slice = defaultSlice + } + return func(subsystem Name) (string, error) { + return filepath.Join(slice, unitName(name)), nil + } +} + +func NewSystemd(root string) (*SystemdController, error) { + conn, err := systemdDbus.New() + if err != nil { + return nil, err + } + return &SystemdController{ + root: root, + conn: conn, + }, nil +} + +type SystemdController struct { + mu sync.Mutex + conn *systemdDbus.Conn + root string +} + +func (s *SystemdController) Name() Name { + return SystemdDbus +} + +func (s *SystemdController) Create(path string, resources *specs.LinuxResources) error { + slice, name := splitName(path) + properties := []systemdDbus.Property{ + systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", name)), + systemdDbus.PropWants(slice), + newProperty("DefaultDependencies", false), + newProperty("Delegate", true), + newProperty("MemoryAccounting", true), + newProperty("CPUAccounting", true), + newProperty("BlockIOAccounting", true), + } + _, err := s.conn.StartTransientUnit(name, "replace", properties, nil) + return err +} + +func (s *SystemdController) Delete(path string) error { + _, name := splitName(path) + _, err := s.conn.StopUnit(name, "replace", nil) + return err +} + +func newProperty(name string, units interface{}) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +func unitName(name string) string { + return fmt.Sprintf("%s.slice", name) +} + +func splitName(path string) (slice string, unit string) { + slice, unit = filepath.Split(path) + return strings.TrimSuffix(slice, "/"), unit +} diff --git a/vendor/github.com/containerd/cgroups/ticks.go b/vendor/github.com/containerd/cgroups/ticks.go new file mode 100644 index 00000000..f471c5ae --- /dev/null +++ b/vendor/github.com/containerd/cgroups/ticks.go @@ -0,0 +1,10 @@ +package cgroups + +func getClockTicks() uint64 { + // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and + // on Linux it's a constant which is safe to be hard coded, + // so we can avoid using cgo here. + // See https://github.com/containerd/cgroups/pull/12 for + // more details. + return 100 +} diff --git a/vendor/github.com/containerd/cgroups/utils.go b/vendor/github.com/containerd/cgroups/utils.go new file mode 100644 index 00000000..d5c7230a --- /dev/null +++ b/vendor/github.com/containerd/cgroups/utils.go @@ -0,0 +1,280 @@ +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + units "github.com/docker/go-units" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +var isUserNS = runningInUserNS() + +// runningInUserNS detects whether we are currently running in a user namespace. +// Copied from github.com/lxc/lxd/shared/util.go +func runningInUserNS() bool { + file, err := os.Open("/proc/self/uid_map") + if err != nil { + // This kernel-provided file only exists if user namespaces are supported + return false + } + defer file.Close() + + buf := bufio.NewReader(file) + l, _, err := buf.ReadLine() + if err != nil { + return false + } + + line := string(l) + var a, b, c int64 + fmt.Sscanf(line, "%d %d %d", &a, &b, &c) + /* + * We assume we are in the initial user namespace if we have a full + * range - 4294967295 uids starting at uid 0. + */ + if a == 0 && b == 0 && c == 4294967295 { + return false + } + return true +} + +// defaults returns all known groups +func defaults(root string) ([]Subsystem, error) { + h, err := NewHugetlb(root) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + s := []Subsystem{ + NewNamed(root, "systemd"), + NewFreezer(root), + NewPids(root), + NewNetCls(root), + NewNetPrio(root), + NewPerfEvent(root), + NewCputset(root), + NewCpu(root), + NewCpuacct(root), + NewMemory(root), + NewBlkio(root), + } + // only add the devices cgroup if we are not in a user namespace + // because modifications are not allowed + if !isUserNS { + s = append(s, NewDevices(root)) + } + // add the hugetlb cgroup if error wasn't due to missing hugetlb + // cgroup support on the host + if err == nil { + s = append(s, h) + } + return s, nil +} + +// remove will remove a cgroup path handling EAGAIN and EBUSY errors and +// retrying the remove after a exp timeout +func remove(path string) error { + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + if err := os.RemoveAll(path); err == nil { + return nil + } + } + return fmt.Errorf("cgroups: unable to remove path %q", path) +} + +// readPids will read all the pids in a cgroup by the provided path +func readPids(path string, subsystem Name) ([]Process, error) { + f, err := os.Open(filepath.Join(path, cgroupProcs)) + if err != nil { + return nil, err + } + defer f.Close() + var ( + out []Process + s = bufio.NewScanner(f) + ) + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, Process{ + Pid: pid, + Subsystem: subsystem, + Path: path, + }) + } + } + return out, nil +} + +func hugePageSizes() ([]string, error) { + var ( + pageSizes []string + sizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"} + ) + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return nil, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return nil, err + } + pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)) + } + return pageSizes, nil +} + +func readUint(path string) (uint64, error) { + v, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + return parseUint(strings.TrimSpace(string(v)), 10, 64) +} + +func parseUint(s string, base, bitSize int) (uint64, error) { + v, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && + intErr.(*strconv.NumError).Err == strconv.ErrRange && + intValue < 0 { + return 0, nil + } + return 0, err + } + return v, nil +} + +func parseKV(raw string) (string, uint64, error) { + parts := strings.Fields(raw) + switch len(parts) { + case 2: + v, err := parseUint(parts[1], 10, 64) + if err != nil { + return "", 0, err + } + return parts[0], v, nil + default: + return "", 0, ErrInvalidFormat + } +} + +func parseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + return parseCgroupFromReader(f) +} + +func parseCgroupFromReader(r io.Reader) (map[string]string, error) { + var ( + cgroups = make(map[string]string) + s = bufio.NewScanner(r) + ) + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + var ( + text = s.Text() + parts = strings.SplitN(text, ":", 3) + ) + if len(parts) < 3 { + return nil, fmt.Errorf("invalid cgroup entry: %q", text) + } + for _, subs := range strings.Split(parts[1], ",") { + if subs != "" { + cgroups[subs] = parts[2] + } + } + } + return cgroups, nil +} + +func getCgroupDestination(subsystem string) (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + fields := strings.Fields(s.Text()) + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[3], nil + } + } + } + return "", ErrNoCgroupMountDestination +} + +func pathers(subystems []Subsystem) []pather { + var out []pather + for _, s := range subystems { + if p, ok := s.(pather); ok { + out = append(out, p) + } + } + return out +} + +func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error { + if c, ok := s.(creator); ok { + p, err := path(s.Name()) + if err != nil { + return err + } + if err := c.Create(p, resources); err != nil { + return err + } + } else if c, ok := s.(pather); ok { + p, err := path(s.Name()) + if err != nil { + return err + } + // do the default create if the group does not have a custom one + if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil { + return err + } + } + return nil +} + +func cleanPath(path string) string { + if path == "" { + return "" + } + path = filepath.Clean(path) + if !filepath.IsAbs(path) { + path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path)) + } + return filepath.Clean(path) +} diff --git a/vendor/github.com/containerd/cgroups/v1.go b/vendor/github.com/containerd/cgroups/v1.go new file mode 100644 index 00000000..f6608f7c --- /dev/null +++ b/vendor/github.com/containerd/cgroups/v1.go @@ -0,0 +1,65 @@ +package cgroups + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" +) + +// V1 returns all the groups in the default cgroups mountpoint in a single hierarchy +func V1() ([]Subsystem, error) { + root, err := v1MountPoint() + if err != nil { + return nil, err + } + subsystems, err := defaults(root) + if err != nil { + return nil, err + } + var enabled []Subsystem + for _, s := range pathers(subsystems) { + // check and remove the default groups that do not exist + if _, err := os.Lstat(s.Path("/")); err == nil { + enabled = append(enabled, s) + } + } + return enabled, nil +} + +// v1MountPoint returns the mount point where the cgroup +// mountpoints are mounted in a single hiearchy +func v1MountPoint() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + if err := scanner.Err(); err != nil { + return "", err + } + var ( + text = scanner.Text() + fields = strings.Split(text, " ") + // safe as mountinfo encodes mountpoints with spaces as \040. + index = strings.Index(text, " - ") + postSeparatorFields = strings.Fields(text[index+3:]) + numPostFields = len(postSeparatorFields) + ) + // this is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + if postSeparatorFields[0] == "cgroup" { + // check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + return filepath.Dir(fields[4]), nil + } + } + return "", ErrMountPointNotExist +} From f51ca87857abdd60d315554cb021275804871618 Mon Sep 17 00:00:00 2001 From: Antonio Murdaca Date: Tue, 29 Aug 2017 23:11:30 +0200 Subject: [PATCH 3/3] *: constify cgroups stuff Signed-off-by: Antonio Murdaca --- libkpod/config.go | 3 ++- oci/oci.go | 9 +++++++-- server/container_create.go | 11 +++++++---- server/sandbox_run.go | 2 +- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/libkpod/config.go b/libkpod/config.go index c71aa314..c3e94043 100644 --- a/libkpod/config.go +++ b/libkpod/config.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "github.com/BurntSushi/toml" + "github.com/kubernetes-incubator/cri-o/oci" "github.com/opencontainers/selinux/go-selinux" ) @@ -20,7 +21,7 @@ const ( apparmorProfileName = "crio-default" cniConfigDir = "/etc/cni/net.d/" cniBinDir = "/opt/cni/bin/" - cgroupManager = "cgroupfs" + cgroupManager = oci.CgroupfsCgroupsManager lockPath = "/run/crio.lock" containerExitsDir = "/var/run/kpod/exits" ) diff --git a/oci/oci.go b/oci/oci.go index d9f9e0b4..ef5027c5 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -30,6 +30,11 @@ const ( ContainerStateStopped = "stopped" // ContainerCreateTimeout represents the value of container creating timeout ContainerCreateTimeout = 10 * time.Second + + // CgroupfsCgroupsManager represents cgroupfs native cgroup manager + CgroupfsCgroupsManager = "cgroupfs" + // SystemdCgroupsManager represents systemd native cgroup manager + SystemdCgroupsManager = "systemd" ) // New creates a new Runtime with options provided @@ -141,7 +146,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { defer parentStartPipe.Close() var args []string - if r.cgroupManager == "systemd" { + if r.cgroupManager == SystemdCgroupsManager { args = append(args, "-s") } args = append(args, "-c", c.id) @@ -187,7 +192,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { childStartPipe.Close() // Move conmon to specified cgroup - if r.cgroupManager == "systemd" { + if r.cgroupManager == SystemdCgroupsManager { logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("crio-conmon", c.id)) if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("crio-conmon", c.id)); err != nil { logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) diff --git a/server/container_create.go b/server/container_create.go index 6c7c7754..8b2a940a 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -38,6 +38,10 @@ const ( seccompUnconfined = "unconfined" seccompRuntimeDefault = "runtime/default" seccompLocalhostPrefix = "localhost/" + + scopePrefix = "crio" + defaultCgroupfsParent = "/crio" + defaultSystemdParent = "system.slice" ) func addOCIBindMounts(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) ([]oci.ContainerVolume, error) { @@ -472,11 +476,10 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } var cgPath string - scopePrefix := "crio" - parent := "/crio" - useSystemd := s.config.CgroupManager == "systemd" + parent := defaultCgroupfsParent + useSystemd := s.config.CgroupManager == oci.SystemdCgroupsManager if useSystemd { - parent = "system.slice" + parent = defaultSystemdParent } if sb.CgroupParent() != "" { parent = sb.CgroupParent() diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 213e1cf9..375ff082 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -325,7 +325,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest // setup cgroup settings cgroupParent := req.GetConfig().GetLinux().CgroupParent if cgroupParent != "" { - if s.config.CgroupManager == "systemd" { + if s.config.CgroupManager == oci.SystemdCgroupsManager { cgPath, err := convertCgroupNameToSystemd(cgroupParent, false) if err != nil { return nil, err