Merge pull request #812 from runcom/fix-cgroupfs-naming

server: container_create: fix cgroupfs scopes naming
This commit is contained in:
Mrunal Patel 2017-08-29 17:52:45 -07:00 committed by GitHub
commit d634468da6
33 changed files with 3040 additions and 15 deletions

View file

@ -5,6 +5,7 @@ import (
"io/ioutil"
"github.com/BurntSushi/toml"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/opencontainers/selinux/go-selinux"
)
@ -20,7 +21,7 @@ const (
apparmorProfileName = "crio-default"
cniConfigDir = "/etc/cni/net.d/"
cniBinDir = "/opt/cni/bin/"
cgroupManager = "cgroupfs"
cgroupManager = oci.CgroupfsCgroupsManager
lockPath = "/run/crio.lock"
containerExitsDir = "/var/run/kpod/exits"
)

View file

@ -273,6 +273,11 @@ func (s *Sandbox) CgroupParent() string {
return s.cgroupParent
}
// UpdateCgroupParent updates the cgroup parent for a sandbox
func (s *Sandbox) UpdateCgroupParent(parent string) {
s.cgroupParent = parent
}
// Privileged returns whether or not the containers in the sandbox are
// privileged containers
func (s *Sandbox) Privileged() bool {

View file

@ -13,6 +13,7 @@ import (
"syscall"
"time"
"github.com/containerd/cgroups"
"github.com/kubernetes-incubator/cri-o/utils"
rspec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
@ -29,6 +30,11 @@ const (
ContainerStateStopped = "stopped"
// ContainerCreateTimeout represents the value of container creating timeout
ContainerCreateTimeout = 10 * time.Second
// CgroupfsCgroupsManager represents cgroupfs native cgroup manager
CgroupfsCgroupsManager = "cgroupfs"
// SystemdCgroupsManager represents systemd native cgroup manager
SystemdCgroupsManager = "systemd"
)
// New creates a new Runtime with options provided
@ -140,7 +146,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
defer parentStartPipe.Close()
var args []string
if r.cgroupManager == "systemd" {
if r.cgroupManager == SystemdCgroupsManager {
args = append(args, "-s")
}
args = append(args, "-c", c.id)
@ -186,11 +192,19 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
childStartPipe.Close()
// Move conmon to specified cgroup
if cgroupParent != "" {
if r.cgroupManager == "systemd" {
logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("crio-conmon", c.id))
if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("crio-conmon", c.id)); err != nil {
logrus.Warnf("Failed to add conmon to sandbox cgroup: %v", err)
if r.cgroupManager == SystemdCgroupsManager {
logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("crio-conmon", c.id))
if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("crio-conmon", c.id)); err != nil {
logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
}
} else {
control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/crio-conmon-"+c.id)), &rspec.LinuxResources{})
if err != nil {
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
} else {
defer control.Delete()
if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil {
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
}
}
}

View file

@ -38,6 +38,10 @@ const (
seccompUnconfined = "unconfined"
seccompRuntimeDefault = "runtime/default"
seccompLocalhostPrefix = "localhost/"
scopePrefix = "crio"
defaultCgroupfsParent = "/crio"
defaultSystemdParent = "system.slice"
)
func addOCIBindMounts(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) ([]oci.ContainerVolume, error) {
@ -471,14 +475,22 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
specgen.SetProcessOOMScoreAdj(int(oomScoreAdj))
}
if sb.CgroupParent() != "" {
if s.config.CgroupManager == "systemd" {
cgPath := sb.CgroupParent() + ":" + "crio" + ":" + containerID
specgen.SetLinuxCgroupsPath(cgPath)
} else {
specgen.SetLinuxCgroupsPath(sb.CgroupParent() + "/" + containerID)
}
var cgPath string
parent := defaultCgroupfsParent
useSystemd := s.config.CgroupManager == oci.SystemdCgroupsManager
if useSystemd {
parent = defaultSystemdParent
}
if sb.CgroupParent() != "" {
parent = sb.CgroupParent()
}
if useSystemd {
cgPath = parent + ":" + scopePrefix + ":" + containerID
} else {
cgPath = filepath.Join(parent, scopePrefix+"-"+containerID)
}
specgen.SetLinuxCgroupsPath(cgPath)
sb.UpdateCgroupParent(parent)
capabilities := linux.GetSecurityContext().GetCapabilities()
toCAPPrefixed := func(cap string) string {

View file

@ -325,7 +325,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
// setup cgroup settings
cgroupParent := req.GetConfig().GetLinux().CgroupParent
if cgroupParent != "" {
if s.config.CgroupManager == "systemd" {
if s.config.CgroupManager == oci.SystemdCgroupsManager {
cgPath, err := convertCgroupNameToSystemd(cgroupParent, false)
if err != nil {
return nil, err

View file

@ -96,3 +96,4 @@ github.com/prometheus/common 13ba4ddd0caa9c28ca7b7bffe1dfa9ed8d5ef207
github.com/prometheus/procfs 65c1f6f8f0fc1e2185eb9863a3bc751496404259
github.com/matttproud/golang_protobuf_extensions fc2b8d3a73c4867e51861bbdd5ae3c1f0869dd6a
github.com/beorn7/perks 3ac7bf7a47d159a033b107610db8a1b6575507a4
github.com/containerd/cgroups 7a5fdd8330119dc70d850260db8f3594d89d6943

201
vendor/github.com/containerd/cgroups/LICENSE generated vendored Normal file
View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

112
vendor/github.com/containerd/cgroups/README.md generated vendored Normal file
View file

@ -0,0 +1,112 @@
# cgroups
[![Build Status](https://travis-ci.org/containerd/cgroups.svg?branch=master)](https://travis-ci.org/containerd/cgroups)
[![codecov](https://codecov.io/gh/containerd/cgroups/branch/master/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups)
Go package for creating, managing, inspecting, and destroying cgroups.
The resources format for settings on the cgroup uses the OCI runtime-spec found
[here](https://github.com/opencontainers/runtime-spec).
## Examples
### Create a new cgroup
This creates a new cgroup using a static path for all subsystems under `/test`.
* /sys/fs/cgroup/cpu/test
* /sys/fs/cgroup/memory/test
* etc....
It uses a single hierarchy and specifies cpu shares as a resource constraint and
uses the v1 implementation of cgroups.
```go
shares := uint64(100)
control, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/test"), &specs.LinuxResources{
CPU: &specs.CPU{
Shares: &shares,
},
})
defer control.Delete()
```
### Create with systemd slice support
```go
control, err := cgroups.New(cgroups.Systemd, cgroups.Slice("system.slice", "runc-test"), &specs.LinuxResources{
CPU: &specs.CPU{
Shares: &shares,
},
})
```
### Load an existing cgroup
```go
control, err = cgroups.Load(cgroups.V1, cgroups.StaticPath("/test"))
```
### Add a process to the cgroup
```go
if err := control.Add(cgroups.Process{Pid:1234}); err != nil {
}
```
### Update the cgroup
To update the resources applied in the cgroup
```go
shares = uint64(200)
if err := control.Update(&specs.LinuxResources{
CPU: &specs.CPU{
Shares: &shares,
},
}); err != nil {
}
```
### Freeze and Thaw the cgroup
```go
if err := control.Freeze(); err != nil {
}
if err := control.Thaw(); err != nil {
}
```
### List all processes in the cgroup or recursively
```go
processes, err := control.Processes(cgroups.Devices, recursive)
```
### Get Stats on the cgroup
```go
stats, err := control.Stat()
```
By adding `cgroups.IgnoreNotExist` all non-existent files will be ignored, e.g. swap memory stats without swap enabled
```go
stats, err := control.Stat(cgroups.IgnoreNotExist)
```
### Move process across cgroups
This allows you to take processes from one cgroup and move them to another.
```go
err := control.MoveTo(destination)
```
### Create subcgroup
```go
subCgroup, err := control.New("child", resources)
```

323
vendor/github.com/containerd/cgroups/blkio.go generated vendored Normal file
View file

@ -0,0 +1,323 @@
package cgroups
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewBlkio(root string) *blkioController {
return &blkioController{
root: filepath.Join(root, string(Blkio)),
}
}
type blkioController struct {
root string
}
func (b *blkioController) Name() Name {
return Blkio
}
func (b *blkioController) Path(path string) string {
return filepath.Join(b.root, path)
}
func (b *blkioController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(b.Path(path), defaultDirPerm); err != nil {
return err
}
if resources.BlockIO == nil {
return nil
}
for _, t := range createBlkioSettings(resources.BlockIO) {
if t.value != nil {
if err := ioutil.WriteFile(
filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", t.name)),
t.format(t.value),
defaultFilePerm,
); err != nil {
return err
}
}
}
return nil
}
func (b *blkioController) Update(path string, resources *specs.LinuxResources) error {
return b.Create(path, resources)
}
func (b *blkioController) Stat(path string, stats *Stats) error {
stats.Blkio = &BlkioStat{}
settings := []blkioStatSettings{
{
name: "throttle.io_serviced",
entry: &stats.Blkio.IoServicedRecursive,
},
{
name: "throttle.io_service_bytes",
entry: &stats.Blkio.IoServiceBytesRecursive,
},
}
// Try to read CFQ stats available on all CFQ enabled kernels first
if _, err := os.Lstat(filepath.Join(b.Path(path), fmt.Sprintf("blkio.io_serviced_recursive"))); err == nil {
settings = append(settings,
blkioStatSettings{
name: "sectors_recursive",
entry: &stats.Blkio.SectorsRecursive,
},
blkioStatSettings{
name: "io_service_bytes_recursive",
entry: &stats.Blkio.IoServiceBytesRecursive,
},
blkioStatSettings{
name: "io_serviced_recursive",
entry: &stats.Blkio.IoServicedRecursive,
},
blkioStatSettings{
name: "io_queued_recursive",
entry: &stats.Blkio.IoQueuedRecursive,
},
blkioStatSettings{
name: "io_service_time_recursive",
entry: &stats.Blkio.IoServiceTimeRecursive,
},
blkioStatSettings{
name: "io_wait_time_recursive",
entry: &stats.Blkio.IoWaitTimeRecursive,
},
blkioStatSettings{
name: "io_merged_recursive",
entry: &stats.Blkio.IoMergedRecursive,
},
blkioStatSettings{
name: "time_recursive",
entry: &stats.Blkio.IoTimeRecursive,
},
)
}
devices, err := getDevices("/dev")
if err != nil {
return err
}
for _, t := range settings {
if err := b.readEntry(devices, path, t.name, t.entry); err != nil {
return err
}
}
return nil
}
func (b *blkioController) readEntry(devices map[deviceKey]string, path, name string, entry *[]BlkioEntry) error {
f, err := os.Open(filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", name)))
if err != nil {
return err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
if err := sc.Err(); err != nil {
return err
}
// format: dev type amount
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
if len(fields) < 3 {
if len(fields) == 2 && fields[0] == "Total" {
// skip total line
continue
} else {
return fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
}
}
major, err := strconv.ParseUint(fields[0], 10, 64)
if err != nil {
return err
}
minor, err := strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return err
}
op := ""
valueField := 2
if len(fields) == 4 {
op = fields[2]
valueField = 3
}
v, err := strconv.ParseUint(fields[valueField], 10, 64)
if err != nil {
return err
}
*entry = append(*entry, BlkioEntry{
Device: devices[deviceKey{major, minor}],
Major: major,
Minor: minor,
Op: op,
Value: v,
})
}
return nil
}
func createBlkioSettings(blkio *specs.LinuxBlockIO) []blkioSettings {
settings := []blkioSettings{
{
name: "weight",
value: blkio.Weight,
format: uintf,
},
{
name: "leaf_weight",
value: blkio.LeafWeight,
format: uintf,
},
}
for _, wd := range blkio.WeightDevice {
settings = append(settings,
blkioSettings{
name: "weight_device",
value: wd,
format: weightdev,
},
blkioSettings{
name: "leaf_weight_device",
value: wd,
format: weightleafdev,
})
}
for _, t := range []struct {
name string
list []specs.LinuxThrottleDevice
}{
{
name: "throttle.read_bps_device",
list: blkio.ThrottleReadBpsDevice,
},
{
name: "throttle.read_iops_device",
list: blkio.ThrottleReadIOPSDevice,
},
{
name: "throttle.write_bps_device",
list: blkio.ThrottleWriteBpsDevice,
},
{
name: "throttle.write_iops_device",
list: blkio.ThrottleWriteIOPSDevice,
},
} {
for _, td := range t.list {
settings = append(settings, blkioSettings{
name: t.name,
value: td,
format: throttleddev,
})
}
}
return settings
}
type blkioSettings struct {
name string
value interface{}
format func(v interface{}) []byte
}
type blkioStatSettings struct {
name string
entry *[]BlkioEntry
}
func uintf(v interface{}) []byte {
return []byte(strconv.FormatUint(uint64(*v.(*uint16)), 10))
}
func weightdev(v interface{}) []byte {
wd := v.(specs.LinuxWeightDevice)
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight))
}
func weightleafdev(v interface{}) []byte {
wd := v.(specs.LinuxWeightDevice)
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight))
}
func throttleddev(v interface{}) []byte {
td := v.(specs.LinuxThrottleDevice)
return []byte(fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate))
}
func splitBlkioStatLine(r rune) bool {
return r == ' ' || r == ':'
}
type deviceKey struct {
major, minor uint64
}
// getDevices makes a best effort attempt to read all the devices into a map
// keyed by major and minor number. Since devices may be mapped multiple times,
// we err on taking the first occurrence.
func getDevices(path string) (map[deviceKey]string, error) {
// TODO(stevvooe): We are ignoring lots of errors. It might be kind of
// challenging to debug this if we aren't mapping devices correctly.
// Consider logging these errors.
devices := map[deviceKey]string{}
if err := filepath.Walk(path, func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
switch {
case fi.IsDir():
switch fi.Name() {
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
return filepath.SkipDir
default:
return nil
}
case fi.Name() == "console":
return nil
default:
if fi.Mode()&os.ModeDevice == 0 {
// skip non-devices
return nil
}
st, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("%s: unable to convert to system stat", p)
}
key := deviceKey{major(st.Rdev), minor(st.Rdev)}
if _, ok := devices[key]; ok {
return nil // skip it if we have already populated the path.
}
devices[key] = p
}
return nil
}); err != nil {
return nil, err
}
return devices, nil
}
func major(devNumber uint64) uint64 {
return (devNumber >> 8) & 0xfff
}
func minor(devNumber uint64) uint64 {
return (devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)
}

389
vendor/github.com/containerd/cgroups/cgroup.go generated vendored Normal file
View file

@ -0,0 +1,389 @@
package cgroups
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
// New returns a new control via the cgroup cgroups interface
func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources) (Cgroup, error) {
subsystems, err := hierarchy()
if err != nil {
return nil, err
}
for _, s := range subsystems {
if err := initializeSubsystem(s, path, resources); err != nil {
return nil, err
}
}
return &cgroup{
path: path,
subsystems: subsystems,
}, nil
}
// Load will load an existing cgroup and allow it to be controlled
func Load(hierarchy Hierarchy, path Path) (Cgroup, error) {
subsystems, err := hierarchy()
if err != nil {
return nil, err
}
// check the the subsystems still exist
for _, s := range pathers(subsystems) {
p, err := path(s.Name())
if err != nil {
return nil, err
}
if _, err := os.Lstat(s.Path(p)); err != nil {
if os.IsNotExist(err) {
return nil, ErrCgroupDeleted
}
return nil, err
}
}
return &cgroup{
path: path,
subsystems: subsystems,
}, nil
}
type cgroup struct {
path Path
subsystems []Subsystem
mu sync.Mutex
err error
}
// New returns a new sub cgroup
func (c *cgroup) New(name string, resources *specs.LinuxResources) (Cgroup, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
path := subPath(c.path, name)
for _, s := range c.subsystems {
if err := initializeSubsystem(s, path, resources); err != nil {
return nil, err
}
}
return &cgroup{
path: path,
subsystems: c.subsystems,
}, nil
}
// Subsystems returns all the subsystems that are currently being
// consumed by the group
func (c *cgroup) Subsystems() []Subsystem {
return c.subsystems
}
// Add moves the provided process into the new cgroup
func (c *cgroup) Add(process Process) error {
if process.Pid <= 0 {
return ErrInvalidPid
}
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
return c.add(process)
}
func (c *cgroup) add(process Process) error {
for _, s := range pathers(c.subsystems) {
p, err := c.path(s.Name())
if err != nil {
return err
}
if err := ioutil.WriteFile(
filepath.Join(s.Path(p), cgroupProcs),
[]byte(strconv.Itoa(process.Pid)),
defaultFilePerm,
); err != nil {
return err
}
}
return nil
}
// Delete will remove the control group from each of the subsystems registered
func (c *cgroup) Delete() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
var errors []string
for _, s := range c.subsystems {
if d, ok := s.(deleter); ok {
sp, err := c.path(s.Name())
if err != nil {
return err
}
if err := d.Delete(sp); err != nil {
errors = append(errors, string(s.Name()))
}
continue
}
if p, ok := s.(pather); ok {
sp, err := c.path(s.Name())
if err != nil {
return err
}
path := p.Path(sp)
if err := remove(path); err != nil {
errors = append(errors, path)
}
}
}
if len(errors) > 0 {
return fmt.Errorf("cgroups: unable to remove paths %s", strings.Join(errors, ", "))
}
c.err = ErrCgroupDeleted
return nil
}
// Stat returns the current stats for the cgroup
func (c *cgroup) Stat(handlers ...ErrorHandler) (*Stats, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
if len(handlers) == 0 {
handlers = append(handlers, errPassthrough)
}
var (
stats = &Stats{}
wg = &sync.WaitGroup{}
errs = make(chan error, len(c.subsystems))
)
for _, s := range c.subsystems {
if ss, ok := s.(stater); ok {
sp, err := c.path(s.Name())
if err != nil {
return nil, err
}
wg.Add(1)
go func() {
defer wg.Done()
if err := ss.Stat(sp, stats); err != nil {
for _, eh := range handlers {
if herr := eh(err); herr != nil {
errs <- herr
}
}
}
}()
}
}
wg.Wait()
close(errs)
for err := range errs {
return nil, err
}
return stats, nil
}
// Update updates the cgroup with the new resource values provided
//
// Be prepared to handle EBUSY when trying to update a cgroup with
// live processes and other operations like Stats being performed at the
// same time
func (c *cgroup) Update(resources *specs.LinuxResources) error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
for _, s := range c.subsystems {
if u, ok := s.(updater); ok {
sp, err := c.path(s.Name())
if err != nil {
return err
}
if err := u.Update(sp, resources); err != nil {
return err
}
}
}
return nil
}
// Processes returns the processes running inside the cgroup along
// with the subsystem used, pid, and path
func (c *cgroup) Processes(subsystem Name, recursive bool) ([]Process, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
return c.processes(subsystem, recursive)
}
func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) {
s := c.getSubsystem(subsystem)
sp, err := c.path(subsystem)
if err != nil {
return nil, err
}
path := s.(pather).Path(sp)
var processes []Process
err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !recursive && info.IsDir() {
if p == path {
return nil
}
return filepath.SkipDir
}
dir, name := filepath.Split(p)
if name != cgroupProcs {
return nil
}
procs, err := readPids(dir, subsystem)
if err != nil {
return err
}
processes = append(processes, procs...)
return nil
})
return processes, err
}
// Freeze freezes the entire cgroup and all the processes inside it
func (c *cgroup) Freeze() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
s := c.getSubsystem(Freezer)
if s == nil {
return ErrFreezerNotSupported
}
sp, err := c.path(Freezer)
if err != nil {
return err
}
return s.(*freezerController).Freeze(sp)
}
// Thaw thaws out the cgroup and all the processes inside it
func (c *cgroup) Thaw() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
s := c.getSubsystem(Freezer)
if s == nil {
return ErrFreezerNotSupported
}
sp, err := c.path(Freezer)
if err != nil {
return err
}
return s.(*freezerController).Thaw(sp)
}
// OOMEventFD returns the memory cgroup's out of memory event fd that triggers
// when processes inside the cgroup receive an oom event
func (c *cgroup) OOMEventFD() (uintptr, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return 0, c.err
}
s := c.getSubsystem(Memory)
if s == nil {
return 0, ErrMemoryNotSupported
}
sp, err := c.path(Memory)
if err != nil {
return 0, err
}
return s.(*memoryController).OOMEventFD(sp)
}
// State returns the state of the cgroup and its processes
func (c *cgroup) State() State {
c.mu.Lock()
defer c.mu.Unlock()
c.checkExists()
if c.err != nil && c.err == ErrCgroupDeleted {
return Deleted
}
s := c.getSubsystem(Freezer)
if s == nil {
return Thawed
}
sp, err := c.path(Freezer)
if err != nil {
return Unknown
}
state, err := s.(*freezerController).state(sp)
if err != nil {
return Unknown
}
return state
}
// MoveTo does a recursive move subsystem by subsystem of all the processes
// inside the group
func (c *cgroup) MoveTo(destination Cgroup) error {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
for _, s := range c.subsystems {
processes, err := c.processes(s.Name(), true)
if err != nil {
return err
}
for _, p := range processes {
if err := destination.Add(p); err != nil {
return err
}
}
}
return nil
}
func (c *cgroup) getSubsystem(n Name) Subsystem {
for _, s := range c.subsystems {
if s.Name() == n {
return s
}
}
return nil
}
func (c *cgroup) checkExists() {
for _, s := range pathers(c.subsystems) {
p, err := c.path(s.Name())
if err != nil {
return
}
if _, err := os.Lstat(s.Path(p)); err != nil {
if os.IsNotExist(err) {
c.err = ErrCgroupDeleted
return
}
}
}
}

58
vendor/github.com/containerd/cgroups/control.go generated vendored Normal file
View file

@ -0,0 +1,58 @@
package cgroups
import (
"os"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
const (
cgroupProcs = "cgroup.procs"
defaultDirPerm = 0755
)
// defaultFilePerm is a var so that the test framework can change the filemode
// of all files created when the tests are running. The difference between the
// tests and real world use is that files like "cgroup.procs" will exist when writing
// to a read cgroup filesystem and do not exist prior when running in the tests.
// this is set to a non 0 value in the test code
var defaultFilePerm = os.FileMode(0)
type Process struct {
// Subsystem is the name of the subsystem that the process is in
Subsystem Name
// Pid is the process id of the process
Pid int
// Path is the full path of the subsystem and location that the process is in
Path string
}
// Cgroup handles interactions with the individual groups to perform
// actions on them as them main interface to this cgroup package
type Cgroup interface {
// New creates a new cgroup under the calling cgroup
New(string, *specs.LinuxResources) (Cgroup, error)
// Add adds a process to the cgroup
Add(Process) error
// Delete removes the cgroup as a whole
Delete() error
// MoveTo moves all the processes under the calling cgroup to the provided one
// subsystems are moved one at a time
MoveTo(Cgroup) error
// Stat returns the stats for all subsystems in the cgroup
Stat(...ErrorHandler) (*Stats, error)
// Update updates all the subsystems with the provided resource changes
Update(resources *specs.LinuxResources) error
// Processes returns all the processes in a select subsystem for the cgroup
Processes(Name, bool) ([]Process, error)
// Freeze freezes or pauses all processes inside the cgroup
Freeze() error
// Thaw thaw or resumes all processes inside the cgroup
Thaw() error
// OOMEventFD returns the memory subsystem's event fd for OOM events
OOMEventFD() (uintptr, error)
// State returns the cgroups current state
State() State
// Subsystems returns all the subsystems in the cgroup
Subsystems() []Subsystem
}

120
vendor/github.com/containerd/cgroups/cpu.go generated vendored Normal file
View file

@ -0,0 +1,120 @@
package cgroups
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewCpu(root string) *cpuController {
return &cpuController{
root: filepath.Join(root, string(Cpu)),
}
}
type cpuController struct {
root string
}
func (c *cpuController) Name() Name {
return Cpu
}
func (c *cpuController) Path(path string) string {
return filepath.Join(c.root, path)
}
func (c *cpuController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(c.Path(path), defaultDirPerm); err != nil {
return err
}
if cpu := resources.CPU; cpu != nil {
for _, t := range []struct {
name string
ivalue *int64
uvalue *uint64
}{
{
name: "rt_period_us",
uvalue: cpu.RealtimePeriod,
},
{
name: "rt_runtime_us",
ivalue: cpu.RealtimeRuntime,
},
{
name: "shares",
uvalue: cpu.Shares,
},
{
name: "cfs_period_us",
uvalue: cpu.Period,
},
{
name: "cfs_quota_us",
ivalue: cpu.Quota,
},
} {
var value []byte
if t.uvalue != nil {
value = []byte(strconv.FormatUint(*t.uvalue, 10))
} else if t.ivalue != nil {
value = []byte(strconv.FormatInt(*t.ivalue, 10))
}
if value != nil {
if err := ioutil.WriteFile(
filepath.Join(c.Path(path), fmt.Sprintf("cpu.%s", t.name)),
value,
defaultFilePerm,
); err != nil {
return err
}
}
}
}
return nil
}
func (c *cpuController) Update(path string, resources *specs.LinuxResources) error {
return c.Create(path, resources)
}
func (c *cpuController) Stat(path string, stats *Stats) error {
f, err := os.Open(filepath.Join(c.Path(path), "cpu.stat"))
if err != nil {
return err
}
defer f.Close()
// get or create the cpu field because cpuacct can also set values on this struct
stats.cpuMu.Lock()
cpu := stats.Cpu
if cpu == nil {
cpu = &CpuStat{}
stats.Cpu = cpu
}
stats.cpuMu.Unlock()
sc := bufio.NewScanner(f)
for sc.Scan() {
if err := sc.Err(); err != nil {
return err
}
key, v, err := parseKV(sc.Text())
if err != nil {
return err
}
switch key {
case "nr_periods":
cpu.Throttling.Periods = v
case "nr_throttled":
cpu.Throttling.ThrottledPeriods = v
case "throttled_time":
cpu.Throttling.ThrottledTime = v
}
}
return nil
}

112
vendor/github.com/containerd/cgroups/cpuacct.go generated vendored Normal file
View file

@ -0,0 +1,112 @@
package cgroups
import (
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
const nanosecondsInSecond = 1000000000
var clockTicks = getClockTicks()
func NewCpuacct(root string) *cpuacctController {
return &cpuacctController{
root: filepath.Join(root, string(Cpuacct)),
}
}
type cpuacctController struct {
root string
}
func (c *cpuacctController) Name() Name {
return Cpuacct
}
func (c *cpuacctController) Path(path string) string {
return filepath.Join(c.root, path)
}
func (c *cpuacctController) Stat(path string, stats *Stats) error {
user, kernel, err := c.getUsage(path)
if err != nil {
return err
}
total, err := readUint(filepath.Join(c.Path(path), "cpuacct.usage"))
if err != nil {
return err
}
percpu, err := c.percpuUsage(path)
if err != nil {
return err
}
stats.cpuMu.Lock()
cpu := stats.Cpu
if cpu == nil {
cpu = &CpuStat{}
stats.Cpu = cpu
}
stats.cpuMu.Unlock()
cpu.Usage.Total = total
cpu.Usage.User = user
cpu.Usage.Kernel = kernel
cpu.Usage.PerCpu = percpu
return nil
}
func (c *cpuacctController) percpuUsage(path string) ([]uint64, error) {
var usage []uint64
data, err := ioutil.ReadFile(filepath.Join(c.Path(path), "cpuacct.usage_percpu"))
if err != nil {
return nil, err
}
for _, v := range strings.Fields(string(data)) {
u, err := strconv.ParseUint(v, 10, 64)
if err != nil {
return nil, err
}
usage = append(usage, u)
}
return usage, nil
}
func (c *cpuacctController) getUsage(path string) (user uint64, kernel uint64, err error) {
statPath := filepath.Join(c.Path(path), "cpuacct.stat")
data, err := ioutil.ReadFile(statPath)
if err != nil {
return 0, 0, err
}
fields := strings.Fields(string(data))
if len(fields) != 4 {
return 0, 0, fmt.Errorf("%q is expected to have 4 fields", statPath)
}
for _, t := range []struct {
index int
name string
value *uint64
}{
{
index: 0,
name: "user",
value: &user,
},
{
index: 2,
name: "system",
value: &kernel,
},
} {
if fields[t.index] != t.name {
return 0, 0, fmt.Errorf("expected field %q but found %q in %q", t.name, fields[t.index], statPath)
}
v, err := strconv.ParseUint(fields[t.index+1], 10, 64)
if err != nil {
return 0, 0, err
}
*t.value = v
}
return (user * nanosecondsInSecond) / clockTicks, (kernel * nanosecondsInSecond) / clockTicks, nil
}

139
vendor/github.com/containerd/cgroups/cpuset.go generated vendored Normal file
View file

@ -0,0 +1,139 @@
package cgroups
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewCputset(root string) *cpusetController {
return &cpusetController{
root: filepath.Join(root, string(Cpuset)),
}
}
type cpusetController struct {
root string
}
func (c *cpusetController) Name() Name {
return Cpuset
}
func (c *cpusetController) Path(path string) string {
return filepath.Join(c.root, path)
}
func (c *cpusetController) Create(path string, resources *specs.LinuxResources) error {
if err := c.ensureParent(c.Path(path), c.root); err != nil {
return err
}
if err := os.MkdirAll(c.Path(path), defaultDirPerm); err != nil {
return err
}
if err := c.copyIfNeeded(c.Path(path), filepath.Dir(c.Path(path))); err != nil {
return err
}
if resources.CPU != nil {
for _, t := range []struct {
name string
value *string
}{
{
name: "cpus",
value: &resources.CPU.Cpus,
},
{
name: "mems",
value: &resources.CPU.Mems,
},
} {
if t.value != nil {
if err := ioutil.WriteFile(
filepath.Join(c.Path(path), fmt.Sprintf("cpuset.%s", t.name)),
[]byte(*t.value),
defaultFilePerm,
); err != nil {
return err
}
}
}
}
return nil
}
func (c *cpusetController) getValues(path string) (cpus []byte, mems []byte, err error) {
if cpus, err = ioutil.ReadFile(filepath.Join(path, "cpuset.cpus")); err != nil && !os.IsNotExist(err) {
return
}
if mems, err = ioutil.ReadFile(filepath.Join(path, "cpuset.mems")); err != nil && !os.IsNotExist(err) {
return
}
return cpus, mems, nil
}
// ensureParent makes sure that the parent directory of current is created
// and populated with the proper cpus and mems files copied from
// it's parent.
func (c *cpusetController) ensureParent(current, root string) error {
parent := filepath.Dir(current)
if _, err := filepath.Rel(root, parent); err != nil {
return nil
}
// Avoid infinite recursion.
if parent == current {
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
}
if cleanPath(parent) != root {
if err := c.ensureParent(parent, root); err != nil {
return err
}
}
if err := os.MkdirAll(current, defaultDirPerm); err != nil {
return err
}
return c.copyIfNeeded(current, parent)
}
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
func (c *cpusetController) copyIfNeeded(current, parent string) error {
var (
err error
currentCpus, currentMems []byte
parentCpus, parentMems []byte
)
if currentCpus, currentMems, err = c.getValues(current); err != nil {
return err
}
if parentCpus, parentMems, err = c.getValues(parent); err != nil {
return err
}
if isEmpty(currentCpus) {
if err := ioutil.WriteFile(
filepath.Join(current, "cpuset.cpus"),
parentCpus,
defaultFilePerm,
); err != nil {
return err
}
}
if isEmpty(currentMems) {
if err := ioutil.WriteFile(
filepath.Join(current, "cpuset.mems"),
parentMems,
defaultFilePerm,
); err != nil {
return err
}
}
return nil
}
func isEmpty(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}

74
vendor/github.com/containerd/cgroups/devices.go generated vendored Normal file
View file

@ -0,0 +1,74 @@
package cgroups
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
const (
allowDeviceFile = "devices.allow"
denyDeviceFile = "devices.deny"
wildcard = -1
)
func NewDevices(root string) *devicesController {
return &devicesController{
root: filepath.Join(root, string(Devices)),
}
}
type devicesController struct {
root string
}
func (d *devicesController) Name() Name {
return Devices
}
func (d *devicesController) Path(path string) string {
return filepath.Join(d.root, path)
}
func (d *devicesController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(d.Path(path), defaultDirPerm); err != nil {
return err
}
for _, device := range resources.Devices {
file := denyDeviceFile
if device.Allow {
file = allowDeviceFile
}
if err := ioutil.WriteFile(
filepath.Join(d.Path(path), file),
[]byte(deviceString(device)),
defaultFilePerm,
); err != nil {
return err
}
}
return nil
}
func (d *devicesController) Update(path string, resources *specs.LinuxResources) error {
return d.Create(path, resources)
}
func deviceString(device specs.LinuxDeviceCgroup) string {
return fmt.Sprintf("%c %s:%s %s",
&device.Type,
deviceNumber(device.Major),
deviceNumber(device.Minor),
&device.Access,
)
}
func deviceNumber(number *int64) string {
if number == nil || *number == wildcard {
return "*"
}
return fmt.Sprint(*number)
}

31
vendor/github.com/containerd/cgroups/errors.go generated vendored Normal file
View file

@ -0,0 +1,31 @@
package cgroups
import (
"errors"
"os"
)
var (
ErrInvalidPid = errors.New("cgroups: pid must be greater than 0")
ErrMountPointNotExist = errors.New("cgroups: cgroup mountpoint does not exist")
ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed")
ErrFreezerNotSupported = errors.New("cgroups: freezer cgroup not supported on this system")
ErrMemoryNotSupported = errors.New("cgroups: memory cgroup not supported on this system")
ErrCgroupDeleted = errors.New("cgroups: cgroup deleted")
ErrNoCgroupMountDestination = errors.New("cgroups: cannot found cgroup mount destination")
)
// ErrorHandler is a function that handles and acts on errors
type ErrorHandler func(err error) error
// IgnoreNotExist ignores any errors that are for not existing files
func IgnoreNotExist(err error) error {
if os.IsNotExist(err) {
return nil
}
return err
}
func errPassthrough(err error) error {
return err
}

69
vendor/github.com/containerd/cgroups/freezer.go generated vendored Normal file
View file

@ -0,0 +1,69 @@
package cgroups
import (
"io/ioutil"
"path/filepath"
"strings"
"time"
)
func NewFreezer(root string) *freezerController {
return &freezerController{
root: filepath.Join(root, string(Freezer)),
}
}
type freezerController struct {
root string
}
func (f *freezerController) Name() Name {
return Freezer
}
func (f *freezerController) Path(path string) string {
return filepath.Join(f.root, path)
}
func (f *freezerController) Freeze(path string) error {
if err := f.changeState(path, Frozen); err != nil {
return err
}
return f.waitState(path, Frozen)
}
func (f *freezerController) Thaw(path string) error {
if err := f.changeState(path, Thawed); err != nil {
return err
}
return f.waitState(path, Thawed)
}
func (f *freezerController) changeState(path string, state State) error {
return ioutil.WriteFile(
filepath.Join(f.root, path, "freezer.state"),
[]byte(strings.ToUpper(string(state))),
defaultFilePerm,
)
}
func (f *freezerController) state(path string) (State, error) {
current, err := ioutil.ReadFile(filepath.Join(f.root, path, "freezer.state"))
if err != nil {
return "", err
}
return State(strings.ToLower(strings.TrimSpace(string(current)))), nil
}
func (f *freezerController) waitState(path string, state State) error {
for {
current, err := f.state(path)
if err != nil {
return err
}
if current == state {
return nil
}
time.Sleep(1 * time.Millisecond)
}
}

4
vendor/github.com/containerd/cgroups/hierarchy.go generated vendored Normal file
View file

@ -0,0 +1,4 @@
package cgroups
// Hierarchy enableds both unified and split hierarchy for cgroups
type Hierarchy func() ([]Subsystem, error)

92
vendor/github.com/containerd/cgroups/hugetlb.go generated vendored Normal file
View file

@ -0,0 +1,92 @@
package cgroups
import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewHugetlb(root string) (*hugetlbController, error) {
sizes, err := hugePageSizes()
if err != nil {
return nil, err
}
return &hugetlbController{
root: filepath.Join(root, string(Hugetlb)),
sizes: sizes,
}, nil
}
type hugetlbController struct {
root string
sizes []string
}
func (h *hugetlbController) Name() Name {
return Hugetlb
}
func (h *hugetlbController) Path(path string) string {
return filepath.Join(h.root, path)
}
func (h *hugetlbController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(h.Path(path), defaultDirPerm); err != nil {
return err
}
for _, limit := range resources.HugepageLimits {
if err := ioutil.WriteFile(
filepath.Join(h.Path(path), strings.Join([]string{"hugetlb", limit.Pagesize, "limit_in_bytes"}, ".")),
[]byte(strconv.FormatUint(limit.Limit, 10)),
defaultFilePerm,
); err != nil {
return err
}
}
return nil
}
func (h *hugetlbController) Stat(path string, stats *Stats) error {
stats.Hugetlb = make(map[string]HugetlbStat)
for _, size := range h.sizes {
s, err := h.readSizeStat(path, size)
if err != nil {
return err
}
stats.Hugetlb[size] = s
}
return nil
}
func (h *hugetlbController) readSizeStat(path, size string) (HugetlbStat, error) {
var s HugetlbStat
for _, t := range []struct {
name string
value *uint64
}{
{
name: "usage_in_bytes",
value: &s.Usage,
},
{
name: "max_usage_in_bytes",
value: &s.Max,
},
{
name: "failcnt",
value: &s.Failcnt,
},
} {
v, err := readUint(filepath.Join(h.Path(path), strings.Join([]string{"hugetlb", size, t.name}, ".")))
if err != nil {
return s, err
}
*t.value = v
}
return s, nil
}

304
vendor/github.com/containerd/cgroups/memory.go generated vendored Normal file
View file

@ -0,0 +1,304 @@
package cgroups
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"golang.org/x/sys/unix"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewMemory(root string) *memoryController {
return &memoryController{
root: filepath.Join(root, string(Memory)),
}
}
type memoryController struct {
root string
}
func (m *memoryController) Name() Name {
return Memory
}
func (m *memoryController) Path(path string) string {
return filepath.Join(m.root, path)
}
func (m *memoryController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(m.Path(path), defaultDirPerm); err != nil {
return err
}
if resources.Memory == nil {
return nil
}
if resources.Memory.Kernel != nil {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := ioutil.WriteFile(
filepath.Join(m.Path(path), "memory.kmem.limit_in_bytes"),
[]byte(strconv.FormatInt(i, 10)),
defaultFilePerm,
); err != nil {
return checkEBUSY(err)
}
}
}
return m.set(path, getMemorySettings(resources))
}
func (m *memoryController) Update(path string, resources *specs.LinuxResources) error {
if resources.Memory == nil {
return nil
}
g := func(v *int64) bool {
return v != nil && *v > 0
}
settings := getMemorySettings(resources)
if g(resources.Memory.Limit) && g(resources.Memory.Swap) {
// if the updated swap value is larger than the current memory limit set the swap changes first
// then set the memory limit as swap must always be larger than the current limit
current, err := readUint(filepath.Join(m.Path(path), "memory.limit_in_bytes"))
if err != nil {
return err
}
if current < uint64(*resources.Memory.Swap) {
settings[0], settings[1] = settings[1], settings[0]
}
}
return m.set(path, settings)
}
func (m *memoryController) Stat(path string, stats *Stats) error {
f, err := os.Open(filepath.Join(m.Path(path), "memory.stat"))
if err != nil {
return err
}
defer f.Close()
stats.Memory = &MemoryStat{}
if err := m.parseStats(f, stats.Memory); err != nil {
return err
}
for _, t := range []struct {
module string
entry *MemoryEntry
}{
{
module: "",
entry: &stats.Memory.Usage,
},
{
module: "memsw",
entry: &stats.Memory.Swap,
},
{
module: "kmem",
entry: &stats.Memory.Kernel,
},
{
module: "kmem.tcp",
entry: &stats.Memory.KernelTCP,
},
} {
for _, tt := range []struct {
name string
value *uint64
}{
{
name: "usage_in_bytes",
value: &t.entry.Usage,
},
{
name: "max_usage_in_bytes",
value: &t.entry.Max,
},
{
name: "failcnt",
value: &t.entry.Failcnt,
},
{
name: "limit_in_bytes",
value: &t.entry.Limit,
},
} {
parts := []string{"memory"}
if t.module != "" {
parts = append(parts, t.module)
}
parts = append(parts, tt.name)
v, err := readUint(filepath.Join(m.Path(path), strings.Join(parts, ".")))
if err != nil {
return err
}
*tt.value = v
}
}
return nil
}
func (m *memoryController) OOMEventFD(path string) (uintptr, error) {
root := m.Path(path)
f, err := os.Open(filepath.Join(root, "memory.oom_control"))
if err != nil {
return 0, err
}
defer f.Close()
fd, _, serr := unix.RawSyscall(unix.SYS_EVENTFD2, 0, unix.FD_CLOEXEC, 0)
if serr != 0 {
return 0, serr
}
if err := writeEventFD(root, f.Fd(), fd); err != nil {
unix.Close(int(fd))
return 0, err
}
return fd, nil
}
func writeEventFD(root string, cfd, efd uintptr) error {
f, err := os.OpenFile(filepath.Join(root, "cgroup.event_control"), os.O_WRONLY, 0)
if err != nil {
return err
}
_, err = f.WriteString(fmt.Sprintf("%d %d", efd, cfd))
f.Close()
return err
}
func (m *memoryController) parseStats(r io.Reader, stat *MemoryStat) error {
var (
raw = make(map[string]uint64)
sc = bufio.NewScanner(r)
line int
)
for sc.Scan() {
if err := sc.Err(); err != nil {
return err
}
key, v, err := parseKV(sc.Text())
if err != nil {
return fmt.Errorf("%d: %v", line, err)
}
raw[key] = v
line++
}
stat.Cache = raw["cache"]
stat.RSS = raw["rss"]
stat.RSSHuge = raw["rss_huge"]
stat.MappedFile = raw["mapped_file"]
stat.Dirty = raw["dirty"]
stat.Writeback = raw["writeback"]
stat.PgPgIn = raw["pgpgin"]
stat.PgPgOut = raw["pgpgout"]
stat.PgFault = raw["pgfault"]
stat.PgMajFault = raw["pgmajfault"]
stat.InactiveAnon = raw["inactive_anon"]
stat.ActiveAnon = raw["active_anon"]
stat.InactiveFile = raw["inactive_file"]
stat.ActiveFile = raw["active_file"]
stat.Unevictable = raw["unevictable"]
stat.HierarchicalMemoryLimit = raw["hierarchical_memory_limit"]
stat.HierarchicalSwapLimit = raw["hierarchical_memsw_limit"]
stat.TotalCache = raw["total_cache"]
stat.TotalRSS = raw["total_rss"]
stat.TotalRSSHuge = raw["total_rss_huge"]
stat.TotalMappedFile = raw["total_mapped_file"]
stat.TotalDirty = raw["total_dirty"]
stat.TotalWriteback = raw["total_writeback"]
stat.TotalPgPgIn = raw["total_pgpgin"]
stat.TotalPgPgOut = raw["total_pgpgout"]
stat.TotalPgFault = raw["total_pgfault"]
stat.TotalPgMajFault = raw["total_pgmajfault"]
stat.TotalInactiveAnon = raw["total_inactive_anon"]
stat.TotalActiveAnon = raw["total_active_anon"]
stat.TotalInactiveFile = raw["total_inactive_file"]
stat.TotalActiveFile = raw["total_active_file"]
stat.TotalUnevictable = raw["total_unevictable"]
return nil
}
func (m *memoryController) set(path string, settings []memorySettings) error {
for _, t := range settings {
if t.value != nil {
if err := ioutil.WriteFile(
filepath.Join(m.Path(path), fmt.Sprintf("memory.%s", t.name)),
[]byte(strconv.FormatInt(*t.value, 10)),
defaultFilePerm,
); err != nil {
return err
}
}
}
return nil
}
type memorySettings struct {
name string
value *int64
}
func getMemorySettings(resources *specs.LinuxResources) []memorySettings {
mem := resources.Memory
var swappiness *int64
if mem.Swappiness != nil {
v := int64(*mem.Swappiness)
swappiness = &v
}
return []memorySettings{
{
name: "limit_in_bytes",
value: mem.Limit,
},
{
name: "memsw.limit_in_bytes",
value: mem.Swap,
},
{
name: "kmem.limit_in_bytes",
value: mem.Kernel,
},
{
name: "kmem.tcp.limit_in_bytes",
value: mem.KernelTCP,
},
{
name: "oom_control",
value: getOomControlValue(mem),
},
{
name: "swappiness",
value: swappiness,
},
}
}
func checkEBUSY(err error) error {
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf(
"failed to set memory.kmem.limit_in_bytes, because either tasks have already joined this cgroup or it has children")
}
}
}
return err
}
func getOomControlValue(mem *specs.LinuxMemory) *int64 {
if mem.DisableOOMKiller != nil && *mem.DisableOOMKiller {
i := int64(1)
return &i
}
return nil
}

23
vendor/github.com/containerd/cgroups/named.go generated vendored Normal file
View file

@ -0,0 +1,23 @@
package cgroups
import "path/filepath"
func NewNamed(root string, name Name) *namedController {
return &namedController{
root: root,
name: name,
}
}
type namedController struct {
root string
name Name
}
func (n *namedController) Name() Name {
return n.name
}
func (n *namedController) Path(path string) string {
return filepath.Join(n.root, string(n.name), path)
}

42
vendor/github.com/containerd/cgroups/net_cls.go generated vendored Normal file
View file

@ -0,0 +1,42 @@
package cgroups
import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewNetCls(root string) *netclsController {
return &netclsController{
root: filepath.Join(root, string(NetCLS)),
}
}
type netclsController struct {
root string
}
func (n *netclsController) Name() Name {
return NetCLS
}
func (n *netclsController) Path(path string) string {
return filepath.Join(n.root, path)
}
func (n *netclsController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(n.Path(path), defaultDirPerm); err != nil {
return err
}
if resources.Network != nil && resources.Network.ClassID != nil && *resources.Network.ClassID > 0 {
return ioutil.WriteFile(
filepath.Join(n.Path(path), "net_cls.classid"),
[]byte(strconv.FormatUint(uint64(*resources.Network.ClassID), 10)),
defaultFilePerm,
)
}
return nil
}

50
vendor/github.com/containerd/cgroups/net_prio.go generated vendored Normal file
View file

@ -0,0 +1,50 @@
package cgroups
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewNetPrio(root string) *netprioController {
return &netprioController{
root: filepath.Join(root, string(NetPrio)),
}
}
type netprioController struct {
root string
}
func (n *netprioController) Name() Name {
return NetPrio
}
func (n *netprioController) Path(path string) string {
return filepath.Join(n.root, path)
}
func (n *netprioController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(n.Path(path), defaultDirPerm); err != nil {
return err
}
if resources.Network != nil {
for _, prio := range resources.Network.Priorities {
if err := ioutil.WriteFile(
filepath.Join(n.Path(path), "net_prio_ifpriomap"),
formatPrio(prio.Name, prio.Priority),
defaultFilePerm,
); err != nil {
return err
}
}
}
return nil
}
func formatPrio(name string, prio uint32) []byte {
return []byte(fmt.Sprintf("%s %d", name, prio))
}

88
vendor/github.com/containerd/cgroups/paths.go generated vendored Normal file
View file

@ -0,0 +1,88 @@
package cgroups
import (
"fmt"
"path/filepath"
"github.com/pkg/errors"
)
type Path func(subsystem Name) (string, error)
func RootPath(subsysem Name) (string, error) {
return "/", nil
}
// StaticPath returns a static path to use for all cgroups
func StaticPath(path string) Path {
return func(_ Name) (string, error) {
return path, nil
}
}
// NestedPath will nest the cgroups based on the calling processes cgroup
// placing its child processes inside its own path
func NestedPath(suffix string) Path {
paths, err := parseCgroupFile("/proc/self/cgroup")
if err != nil {
return errorPath(err)
}
return existingPath(paths, suffix)
}
// PidPath will return the correct cgroup paths for an existing process running inside a cgroup
// This is commonly used for the Load function to restore an existing container
func PidPath(pid int) Path {
p := fmt.Sprintf("/proc/%d/cgroup", pid)
paths, err := parseCgroupFile(p)
if err != nil {
return errorPath(errors.Wrapf(err, "parse cgroup file %s", p))
}
return existingPath(paths, "")
}
func existingPath(paths map[string]string, suffix string) Path {
// localize the paths based on the root mount dest for nested cgroups
for n, p := range paths {
dest, err := getCgroupDestination(string(n))
if err != nil {
return errorPath(err)
}
rel, err := filepath.Rel(dest, p)
if err != nil {
return errorPath(err)
}
if rel == "." {
rel = dest
}
paths[n] = filepath.Join("/", rel)
}
return func(name Name) (string, error) {
root, ok := paths[string(name)]
if !ok {
if root, ok = paths[fmt.Sprintf("name=%s", name)]; !ok {
return "", fmt.Errorf("unable to find %q in controller set", name)
}
}
if suffix != "" {
return filepath.Join(root, suffix), nil
}
return root, nil
}
}
func subPath(path Path, subName string) Path {
return func(name Name) (string, error) {
p, err := path(name)
if err != nil {
return "", err
}
return filepath.Join(p, subName), nil
}
}
func errorPath(err error) Path {
return func(_ Name) (string, error) {
return "", err
}
}

21
vendor/github.com/containerd/cgroups/perf_event.go generated vendored Normal file
View file

@ -0,0 +1,21 @@
package cgroups
import "path/filepath"
func NewPerfEvent(root string) *PerfEventController {
return &PerfEventController{
root: filepath.Join(root, string(PerfEvent)),
}
}
type PerfEventController struct {
root string
}
func (p *PerfEventController) Name() Name {
return PerfEvent
}
func (p *PerfEventController) Path(path string) string {
return filepath.Join(p.root, path)
}

69
vendor/github.com/containerd/cgroups/pids.go generated vendored Normal file
View file

@ -0,0 +1,69 @@
package cgroups
import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewPids(root string) *pidsController {
return &pidsController{
root: filepath.Join(root, string(Pids)),
}
}
type pidsController struct {
root string
}
func (p *pidsController) Name() Name {
return Pids
}
func (p *pidsController) Path(path string) string {
return filepath.Join(p.root, path)
}
func (p *pidsController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(p.Path(path), defaultDirPerm); err != nil {
return err
}
if resources.Pids != nil && resources.Pids.Limit > 0 {
return ioutil.WriteFile(
filepath.Join(p.Path(path), "pids.max"),
[]byte(strconv.FormatInt(resources.Pids.Limit, 10)),
defaultFilePerm,
)
}
return nil
}
func (p *pidsController) Update(path string, resources *specs.LinuxResources) error {
return p.Create(path, resources)
}
func (p *pidsController) Stat(path string, stats *Stats) error {
current, err := readUint(filepath.Join(p.Path(path), "pids.current"))
if err != nil {
return err
}
var max uint64
maxData, err := ioutil.ReadFile(filepath.Join(p.Path(path), "pids.max"))
if err != nil {
return err
}
if maxS := strings.TrimSpace(string(maxData)); maxS != "max" {
if max, err = parseUint(maxS, 10, 64); err != nil {
return err
}
}
stats.Pids = &PidsStat{
Current: current,
Limit: max,
}
return nil
}

12
vendor/github.com/containerd/cgroups/state.go generated vendored Normal file
View file

@ -0,0 +1,12 @@
package cgroups
// State is a type that represents the state of the current cgroup
type State string
const (
Unknown State = ""
Thawed State = "thawed"
Frozen State = "frozen"
Freezing State = "freezing"
Deleted State = "deleted"
)

109
vendor/github.com/containerd/cgroups/stats.go generated vendored Normal file
View file

@ -0,0 +1,109 @@
package cgroups
import "sync"
type Stats struct {
cpuMu sync.Mutex
Hugetlb map[string]HugetlbStat
Pids *PidsStat
Cpu *CpuStat
Memory *MemoryStat
Blkio *BlkioStat
}
type HugetlbStat struct {
Usage uint64
Max uint64
Failcnt uint64
}
type PidsStat struct {
Current uint64
Limit uint64
}
type CpuStat struct {
Usage CpuUsage
Throttling Throttle
}
type CpuUsage struct {
// Units: nanoseconds.
Total uint64
PerCpu []uint64
Kernel uint64
User uint64
}
type Throttle struct {
Periods uint64
ThrottledPeriods uint64
ThrottledTime uint64
}
type MemoryStat struct {
Cache uint64
RSS uint64
RSSHuge uint64
MappedFile uint64
Dirty uint64
Writeback uint64
PgPgIn uint64
PgPgOut uint64
PgFault uint64
PgMajFault uint64
InactiveAnon uint64
ActiveAnon uint64
InactiveFile uint64
ActiveFile uint64
Unevictable uint64
HierarchicalMemoryLimit uint64
HierarchicalSwapLimit uint64
TotalCache uint64
TotalRSS uint64
TotalRSSHuge uint64
TotalMappedFile uint64
TotalDirty uint64
TotalWriteback uint64
TotalPgPgIn uint64
TotalPgPgOut uint64
TotalPgFault uint64
TotalPgMajFault uint64
TotalInactiveAnon uint64
TotalActiveAnon uint64
TotalInactiveFile uint64
TotalActiveFile uint64
TotalUnevictable uint64
Usage MemoryEntry
Swap MemoryEntry
Kernel MemoryEntry
KernelTCP MemoryEntry
}
type MemoryEntry struct {
Limit uint64
Usage uint64
Max uint64
Failcnt uint64
}
type BlkioStat struct {
IoServiceBytesRecursive []BlkioEntry
IoServicedRecursive []BlkioEntry
IoQueuedRecursive []BlkioEntry
IoServiceTimeRecursive []BlkioEntry
IoWaitTimeRecursive []BlkioEntry
IoMergedRecursive []BlkioEntry
IoTimeRecursive []BlkioEntry
SectorsRecursive []BlkioEntry
}
type BlkioEntry struct {
Op string
Device string
Major uint64
Minor uint64
Value uint64
}

94
vendor/github.com/containerd/cgroups/subsystem.go generated vendored Normal file
View file

@ -0,0 +1,94 @@
package cgroups
import (
"fmt"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
// Name is a typed name for a cgroup subsystem
type Name string
const (
Devices Name = "devices"
Hugetlb Name = "hugetlb"
Freezer Name = "freezer"
Pids Name = "pids"
NetCLS Name = "net_cls"
NetPrio Name = "net_prio"
PerfEvent Name = "perf_event"
Cpuset Name = "cpuset"
Cpu Name = "cpu"
Cpuacct Name = "cpuacct"
Memory Name = "memory"
Blkio Name = "blkio"
)
// Subsystems returns a complete list of the default cgroups
// avaliable on most linux systems
func Subsystems() []Name {
n := []Name{
Hugetlb,
Freezer,
Pids,
NetCLS,
NetPrio,
PerfEvent,
Cpuset,
Cpu,
Cpuacct,
Memory,
Blkio,
}
if !isUserNS {
n = append(n, Devices)
}
return n
}
type Subsystem interface {
Name() Name
}
type pather interface {
Subsystem
Path(path string) string
}
type creator interface {
Subsystem
Create(path string, resources *specs.LinuxResources) error
}
type deleter interface {
Subsystem
Delete(path string) error
}
type stater interface {
Subsystem
Stat(path string, stats *Stats) error
}
type updater interface {
Subsystem
Update(path string, resources *specs.LinuxResources) error
}
// SingleSubsystem returns a single cgroup subsystem within the base Hierarchy
func SingleSubsystem(baseHierarchy Hierarchy, subsystem Name) Hierarchy {
return func() ([]Subsystem, error) {
subsystems, err := baseHierarchy()
if err != nil {
return nil, err
}
for _, s := range subsystems {
if s.Name() == subsystem {
return []Subsystem{
s,
}, nil
}
}
return nil, fmt.Errorf("unable to find subsystem %s", subsystem)
}
}

101
vendor/github.com/containerd/cgroups/systemd.go generated vendored Normal file
View file

@ -0,0 +1,101 @@
package cgroups
import (
"fmt"
"path/filepath"
"strings"
"sync"
systemdDbus "github.com/coreos/go-systemd/dbus"
"github.com/godbus/dbus"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
const (
SystemdDbus Name = "systemd"
defaultSlice = "system.slice"
)
func Systemd() ([]Subsystem, error) {
root, err := v1MountPoint()
if err != nil {
return nil, err
}
defaultSubsystems, err := defaults(root)
if err != nil {
return nil, err
}
s, err := NewSystemd(root)
if err != nil {
return nil, err
}
// make sure the systemd controller is added first
return append([]Subsystem{s}, defaultSubsystems...), nil
}
func Slice(slice, name string) Path {
if slice == "" {
slice = defaultSlice
}
return func(subsystem Name) (string, error) {
return filepath.Join(slice, unitName(name)), nil
}
}
func NewSystemd(root string) (*SystemdController, error) {
conn, err := systemdDbus.New()
if err != nil {
return nil, err
}
return &SystemdController{
root: root,
conn: conn,
}, nil
}
type SystemdController struct {
mu sync.Mutex
conn *systemdDbus.Conn
root string
}
func (s *SystemdController) Name() Name {
return SystemdDbus
}
func (s *SystemdController) Create(path string, resources *specs.LinuxResources) error {
slice, name := splitName(path)
properties := []systemdDbus.Property{
systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", name)),
systemdDbus.PropWants(slice),
newProperty("DefaultDependencies", false),
newProperty("Delegate", true),
newProperty("MemoryAccounting", true),
newProperty("CPUAccounting", true),
newProperty("BlockIOAccounting", true),
}
_, err := s.conn.StartTransientUnit(name, "replace", properties, nil)
return err
}
func (s *SystemdController) Delete(path string) error {
_, name := splitName(path)
_, err := s.conn.StopUnit(name, "replace", nil)
return err
}
func newProperty(name string, units interface{}) systemdDbus.Property {
return systemdDbus.Property{
Name: name,
Value: dbus.MakeVariant(units),
}
}
func unitName(name string) string {
return fmt.Sprintf("%s.slice", name)
}
func splitName(path string) (slice string, unit string) {
slice, unit = filepath.Split(path)
return strings.TrimSuffix(slice, "/"), unit
}

10
vendor/github.com/containerd/cgroups/ticks.go generated vendored Normal file
View file

@ -0,0 +1,10 @@
package cgroups
func getClockTicks() uint64 {
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
// on Linux it's a constant which is safe to be hard coded,
// so we can avoid using cgo here.
// See https://github.com/containerd/cgroups/pull/12 for
// more details.
return 100
}

280
vendor/github.com/containerd/cgroups/utils.go generated vendored Normal file
View file

@ -0,0 +1,280 @@
package cgroups
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"time"
units "github.com/docker/go-units"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
var isUserNS = runningInUserNS()
// runningInUserNS detects whether we are currently running in a user namespace.
// Copied from github.com/lxc/lxd/shared/util.go
func runningInUserNS() bool {
file, err := os.Open("/proc/self/uid_map")
if err != nil {
// This kernel-provided file only exists if user namespaces are supported
return false
}
defer file.Close()
buf := bufio.NewReader(file)
l, _, err := buf.ReadLine()
if err != nil {
return false
}
line := string(l)
var a, b, c int64
fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
/*
* We assume we are in the initial user namespace if we have a full
* range - 4294967295 uids starting at uid 0.
*/
if a == 0 && b == 0 && c == 4294967295 {
return false
}
return true
}
// defaults returns all known groups
func defaults(root string) ([]Subsystem, error) {
h, err := NewHugetlb(root)
if err != nil && !os.IsNotExist(err) {
return nil, err
}
s := []Subsystem{
NewNamed(root, "systemd"),
NewFreezer(root),
NewPids(root),
NewNetCls(root),
NewNetPrio(root),
NewPerfEvent(root),
NewCputset(root),
NewCpu(root),
NewCpuacct(root),
NewMemory(root),
NewBlkio(root),
}
// only add the devices cgroup if we are not in a user namespace
// because modifications are not allowed
if !isUserNS {
s = append(s, NewDevices(root))
}
// add the hugetlb cgroup if error wasn't due to missing hugetlb
// cgroup support on the host
if err == nil {
s = append(s, h)
}
return s, nil
}
// remove will remove a cgroup path handling EAGAIN and EBUSY errors and
// retrying the remove after a exp timeout
func remove(path string) error {
delay := 10 * time.Millisecond
for i := 0; i < 5; i++ {
if i != 0 {
time.Sleep(delay)
delay *= 2
}
if err := os.RemoveAll(path); err == nil {
return nil
}
}
return fmt.Errorf("cgroups: unable to remove path %q", path)
}
// readPids will read all the pids in a cgroup by the provided path
func readPids(path string, subsystem Name) ([]Process, error) {
f, err := os.Open(filepath.Join(path, cgroupProcs))
if err != nil {
return nil, err
}
defer f.Close()
var (
out []Process
s = bufio.NewScanner(f)
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, Process{
Pid: pid,
Subsystem: subsystem,
Path: path,
})
}
}
return out, nil
}
func hugePageSizes() ([]string, error) {
var (
pageSizes []string
sizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"}
)
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
if err != nil {
return nil, err
}
for _, st := range files {
nameArray := strings.Split(st.Name(), "-")
pageSize, err := units.RAMInBytes(nameArray[1])
if err != nil {
return nil, err
}
pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList))
}
return pageSizes, nil
}
func readUint(path string) (uint64, error) {
v, err := ioutil.ReadFile(path)
if err != nil {
return 0, err
}
return parseUint(strings.TrimSpace(string(v)), 10, 64)
}
func parseUint(s string, base, bitSize int) (uint64, error) {
v, err := strconv.ParseUint(s, base, bitSize)
if err != nil {
intValue, intErr := strconv.ParseInt(s, base, bitSize)
// 1. Handle negative values greater than MinInt64 (and)
// 2. Handle negative values lesser than MinInt64
if intErr == nil && intValue < 0 {
return 0, nil
} else if intErr != nil &&
intErr.(*strconv.NumError).Err == strconv.ErrRange &&
intValue < 0 {
return 0, nil
}
return 0, err
}
return v, nil
}
func parseKV(raw string) (string, uint64, error) {
parts := strings.Fields(raw)
switch len(parts) {
case 2:
v, err := parseUint(parts[1], 10, 64)
if err != nil {
return "", 0, err
}
return parts[0], v, nil
default:
return "", 0, ErrInvalidFormat
}
}
func parseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
return parseCgroupFromReader(f)
}
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
var (
cgroups = make(map[string]string)
s = bufio.NewScanner(r)
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
var (
text = s.Text()
parts = strings.SplitN(text, ":", 3)
)
if len(parts) < 3 {
return nil, fmt.Errorf("invalid cgroup entry: %q", text)
}
for _, subs := range strings.Split(parts[1], ",") {
if subs != "" {
cgroups[subs] = parts[2]
}
}
}
return cgroups, nil
}
func getCgroupDestination(subsystem string) (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return "", err
}
fields := strings.Fields(s.Text())
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[3], nil
}
}
}
return "", ErrNoCgroupMountDestination
}
func pathers(subystems []Subsystem) []pather {
var out []pather
for _, s := range subystems {
if p, ok := s.(pather); ok {
out = append(out, p)
}
}
return out
}
func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error {
if c, ok := s.(creator); ok {
p, err := path(s.Name())
if err != nil {
return err
}
if err := c.Create(p, resources); err != nil {
return err
}
} else if c, ok := s.(pather); ok {
p, err := path(s.Name())
if err != nil {
return err
}
// do the default create if the group does not have a custom one
if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil {
return err
}
}
return nil
}
func cleanPath(path string) string {
if path == "" {
return ""
}
path = filepath.Clean(path)
if !filepath.IsAbs(path) {
path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
}
return filepath.Clean(path)
}

65
vendor/github.com/containerd/cgroups/v1.go generated vendored Normal file
View file

@ -0,0 +1,65 @@
package cgroups
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strings"
)
// V1 returns all the groups in the default cgroups mountpoint in a single hierarchy
func V1() ([]Subsystem, error) {
root, err := v1MountPoint()
if err != nil {
return nil, err
}
subsystems, err := defaults(root)
if err != nil {
return nil, err
}
var enabled []Subsystem
for _, s := range pathers(subsystems) {
// check and remove the default groups that do not exist
if _, err := os.Lstat(s.Path("/")); err == nil {
enabled = append(enabled, s)
}
}
return enabled, nil
}
// v1MountPoint returns the mount point where the cgroup
// mountpoints are mounted in a single hiearchy
func v1MountPoint() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
if err := scanner.Err(); err != nil {
return "", err
}
var (
text = scanner.Text()
fields = strings.Split(text, " ")
// safe as mountinfo encodes mountpoints with spaces as \040.
index = strings.Index(text, " - ")
postSeparatorFields = strings.Fields(text[index+3:])
numPostFields = len(postSeparatorFields)
)
// this is an error as we can't detect if the mount is for "cgroup"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "cgroup" {
// check that the mount is properly formated.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return filepath.Dir(fields[4]), nil
}
}
return "", ErrMountPointNotExist
}