update vendor

Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
This commit is contained in:
Jess Frazelle 2018-03-19 21:36:34 -04:00
parent 7a437ada25
commit 639756e8c6
4300 changed files with 824810 additions and 9292 deletions

View file

@ -1,191 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2014 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,328 @@
# libcontainer
[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer)
Libcontainer provides a native Go implementation for creating containers
with namespaces, cgroups, capabilities, and filesystem access controls.
It allows you to manage the lifecycle of the container performing additional operations
after the container is created.
#### Container
A container is a self contained execution environment that shares the kernel of the
host system and which is (optionally) isolated from other containers in the system.
#### Using libcontainer
Because containers are spawned in a two step process you will need a binary that
will be executed as the init process for the container. In libcontainer, we use
the current binary (/proc/self/exe) to be executed as the init process, and use
arg "init", we call the first step process "bootstrap", so you always need a "init"
function as the entry of "bootstrap".
In addition to the go init function the early stage bootstrap is handled by importing
[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md).
```go
import (
_ "github.com/opencontainers/runc/libcontainer/nsenter"
)
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
logrus.Fatal(err)
}
panic("--this line should have never been executed, congratulations--")
}
}
```
Then to create a container you first have to initialize an instance of a factory
that will handle the creation and initialization for a container.
```go
factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
if err != nil {
logrus.Fatal(err)
return
}
```
Once you have an instance of the factory created we can create a configuration
struct describing how the container is to be created. A sample would look similar to this:
```go
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
config := &configs.Config{
Rootfs: "/your/path/to/rootfs",
Capabilities: &configs.Capabilities{
Bounding: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Effective: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Inheritable: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Permitted: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Ambient: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
{Type: configs.NEWPID},
{Type: configs.NEWUSER},
{Type: configs.NEWNET},
}),
Cgroups: &configs.Cgroup{
Name: "test-container",
Parent: "system",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: nil,
AllowedDevices: configs.DefaultAllowedDevices,
},
},
MaskPaths: []string{
"/proc/kcore",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "testing",
Mounts: []*configs.Mount{
{
Source: "proc",
Destination: "/proc",
Device: "proc",
Flags: defaultMountFlags,
},
{
Source: "tmpfs",
Destination: "/dev",
Device: "tmpfs",
Flags: unix.MS_NOSUID | unix.MS_STRICTATIME,
Data: "mode=755",
},
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
Flags: unix.MS_NOSUID | unix.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
{
Device: "tmpfs",
Source: "shm",
Destination: "/dev/shm",
Data: "mode=1777,size=65536k",
Flags: defaultMountFlags,
},
{
Source: "mqueue",
Destination: "/dev/mqueue",
Device: "mqueue",
Flags: defaultMountFlags,
},
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | unix.MS_RDONLY,
},
},
UidMappings: []configs.IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 65536,
},
},
GidMappings: []configs.IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 65536,
},
},
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: unix.RLIMIT_NOFILE,
Hard: uint64(1025),
Soft: uint64(1025),
},
},
}
```
Once you have the configuration populated you can create a container:
```go
container, err := factory.Create("container-id", config)
if err != nil {
logrus.Fatal(err)
return
}
```
To spawn bash as the initial process inside the container and have the
processes pid returned in order to wait, signal, or kill the process:
```go
process := &libcontainer.Process{
Args: []string{"/bin/bash"},
Env: []string{"PATH=/bin"},
User: "daemon",
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
err := container.Run(process)
if err != nil {
container.Destroy()
logrus.Fatal(err)
return
}
// wait for the process to finish.
_, err := process.Wait()
if err != nil {
logrus.Fatal(err)
}
// destroy the container.
container.Destroy()
```
Additional ways to interact with a running container are:
```go
// return all the pids for all processes running inside the container.
processes, err := container.Processes()
// get detailed cpu, memory, io, and network statistics for the container and
// it's processes.
stats, err := container.Stats()
// pause all processes inside the container.
container.Pause()
// resume all paused processes.
container.Resume()
// send signal to container's init process.
container.Signal(signal)
// update container resource constraints.
container.Set(config)
// get current status of the container.
status, err := container.Status()
// get current container's state information.
state, err := container.State()
```
#### Checkpoint & Restore
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
This let's you save the state of a process running inside a container to disk, and then restore
that state into a new process, on the same machine or on another machine.
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
If you don't already have `criu` installed, you can build it from source, following the
[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image
generated when building libcontainer with docker.
## Copyright and license
Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license.
Docs released under Creative commons.

View file

@ -0,0 +1,418 @@
## Container Specification - v1
This is the standard configuration for version 1 containers. It includes
namespaces, standard filesystem setup, a default Linux capability set, and
information about resource reservations. It also has information about any
populated environment settings for the processes running inside a container.
Along with the configuration of how a container is created the standard also
discusses actions that can be performed on a container to manage and inspect
information about the processes running inside.
The v1 profile is meant to be able to accommodate the majority of applications
with a strong security configuration.
### System Requirements and Compatibility
Minimum requirements:
* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches)
* Mounted cgroups with each subsystem in its own hierarchy
### Namespaces
| Flag | Enabled |
| ------------ | ------- |
| CLONE_NEWPID | 1 |
| CLONE_NEWUTS | 1 |
| CLONE_NEWIPC | 1 |
| CLONE_NEWNET | 1 |
| CLONE_NEWNS | 1 |
| CLONE_NEWUSER | 1 |
Namespaces are created for the container via the `clone` syscall.
### Filesystem
A root filesystem must be provided to a container for execution. The container
will use this root filesystem (rootfs) to jail and spawn processes inside where
the binaries and system libraries are local to that directory. Any binaries
to be executed must be contained within this rootfs.
Mounts that happen inside the container are automatically cleaned up when the
container exits as the mount namespace is destroyed and the kernel will
unmount all the mounts that were setup within that namespace.
For a container to execute properly there are certain filesystems that
are required to be mounted within the rootfs that the runtime will setup.
| Path | Type | Flags | Data |
| ----------- | ------ | -------------------------------------- | ---------------------------------------- |
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 |
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
After a container's filesystems are mounted within the newly created
mount namespace `/dev` will need to be populated with a set of device nodes.
It is expected that a rootfs does not need to have any device nodes specified
for `/dev` within the rootfs as the container will setup the correct devices
that are required for executing a container's process.
| Path | Mode | Access |
| ------------ | ---- | ---------- |
| /dev/null | 0666 | rwm |
| /dev/zero | 0666 | rwm |
| /dev/full | 0666 | rwm |
| /dev/tty | 0666 | rwm |
| /dev/random | 0666 | rwm |
| /dev/urandom | 0666 | rwm |
**ptmx**
`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within
the container.
The use of a pseudo TTY is optional within a container and it should support both.
If a pseudo is provided to the container `/dev/console` will need to be
setup by binding the console in `/dev/` after it has been populated and mounted
in tmpfs.
| Source | Destination | UID GID | Mode | Type |
| --------------- | ------------ | ------- | ---- | ---- |
| *pty host path* | /dev/console | 0 0 | 0600 | bind |
After `/dev/null` has been setup we check for any external links between
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
to `/dev/null` outside the container we close and `dup2` the `/dev/null`
that is local to the container's rootfs.
After the container has `/proc` mounted a few standard symlinks are setup
within `/dev/` for the io.
| Source | Destination |
| --------------- | ----------- |
| /proc/self/fd | /dev/fd |
| /proc/self/fd/0 | /dev/stdin |
| /proc/self/fd/1 | /dev/stdout |
| /proc/self/fd/2 | /dev/stderr |
A `pivot_root` is used to change the root for the process, effectively
jailing the process inside the rootfs.
```c
put_old = mkdir(...);
pivot_root(rootfs, put_old);
chdir("/");
unmount(put_old, MS_DETACH);
rmdir(put_old);
```
For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined
with a `chroot` is required as `pivot_root` is not supported in `ramfs`.
```c
mount(rootfs, "/", NULL, MS_MOVE, NULL);
chroot(".");
chdir("/");
```
The `umask` is set back to `0022` after the filesystem setup has been completed.
### Resources
Cgroups are used to handle resource allocation for containers. This includes
system resources like cpu, memory, and device access.
| Subsystem | Enabled |
| ---------- | ------- |
| devices | 1 |
| memory | 1 |
| cpu | 1 |
| cpuacct | 1 |
| cpuset | 1 |
| blkio | 1 |
| perf_event | 1 |
| freezer | 1 |
| hugetlb | 1 |
| pids | 1 |
All cgroup subsystem are joined so that statistics can be collected from
each of the subsystems. Freezer does not expose any stats but is joined
so that containers can be paused and resumed.
The parent process of the container's init must place the init pid inside
the correct cgroups before the initialization begins. This is done so
that no processes or threads escape the cgroups. This sync is
done via a pipe ( specified in the runtime section below ) that the container's
init process will block waiting for the parent to finish setup.
### IntelRdt
Intel platforms with new Xeon CPU support Intel Resource Director Technology
(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
currently supports L3 cache resource allocation.
This feature provides a way for the software to restrict cache allocation to a
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
The different subsets are identified by class of service (CLOS) and each CLOS
has a capacity bitmask (CBM).
It can be used to handle L3 cache resource allocation for containers if
hardware and kernel support Intel RDT/CAT.
In Linux 4.10 kernel or newer, the interface is defined and exposed via
"resource control" filesystem, which is a "cgroup-like" interface.
Comparing with cgroups, it has similar process management lifecycle and
interfaces in a container. But unlike cgroups' hierarchy, it has single level
filesystem layout.
Intel RDT "resource control" filesystem hierarchy:
```
mount -t resctrl resctrl /sys/fs/resctrl
tree /sys/fs/resctrl
/sys/fs/resctrl/
|-- info
| |-- L3
| |-- cbm_mask
| |-- min_cbm_bits
| |-- num_closids
|-- cpus
|-- schemata
|-- tasks
|-- <container_id>
|-- cpus
|-- schemata
|-- tasks
```
For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
resource constraints.
The file `tasks` has a list of tasks that belongs to this group (e.g.,
<container_id>" group). Tasks can be added to a group by writing the task ID
to the "tasks" file (which will automatically remove them from the previous
group to which they belonged). New tasks created by fork(2) and clone(2) are
added to the same group as their parent. If a pid is not in any sub group, it
is in root group.
The file `schemata` has allocation masks/values for L3 cache on each socket,
which contains L3 cache id and capacity bitmask (CBM).
```
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
```
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
be set is less than the max bit. The max bits in the CBM is varied among
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
layout, the CBM in a group should be a subset of the CBM in root. Kernel will
check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
For more information about Intel RDT/CAT kernel interface:
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
An example for runc:
```
Consider a two-socket machine with two L3 caches where the default CBM is
0xfffff and the max CBM length is 20 bits. With this configuration, tasks
inside the container only have access to the "upper" 80% of L3 cache id 0 and
the "lower" 50% L3 cache id 1:
"linux": {
"intelRdt": {
"l3CacheSchema": "L3:0=ffff0;1=3ff"
}
}
```
### Security
The standard set of Linux capabilities that are set in a container
provide a good default for security and flexibility for the applications.
| Capability | Enabled |
| -------------------- | ------- |
| CAP_NET_RAW | 1 |
| CAP_NET_BIND_SERVICE | 1 |
| CAP_AUDIT_READ | 1 |
| CAP_AUDIT_WRITE | 1 |
| CAP_DAC_OVERRIDE | 1 |
| CAP_SETFCAP | 1 |
| CAP_SETPCAP | 1 |
| CAP_SETGID | 1 |
| CAP_SETUID | 1 |
| CAP_MKNOD | 1 |
| CAP_CHOWN | 1 |
| CAP_FOWNER | 1 |
| CAP_FSETID | 1 |
| CAP_KILL | 1 |
| CAP_SYS_CHROOT | 1 |
| CAP_NET_BROADCAST | 0 |
| CAP_SYS_MODULE | 0 |
| CAP_SYS_RAWIO | 0 |
| CAP_SYS_PACCT | 0 |
| CAP_SYS_ADMIN | 0 |
| CAP_SYS_NICE | 0 |
| CAP_SYS_RESOURCE | 0 |
| CAP_SYS_TIME | 0 |
| CAP_SYS_TTY_CONFIG | 0 |
| CAP_AUDIT_CONTROL | 0 |
| CAP_MAC_OVERRIDE | 0 |
| CAP_MAC_ADMIN | 0 |
| CAP_NET_ADMIN | 0 |
| CAP_SYSLOG | 0 |
| CAP_DAC_READ_SEARCH | 0 |
| CAP_LINUX_IMMUTABLE | 0 |
| CAP_IPC_LOCK | 0 |
| CAP_IPC_OWNER | 0 |
| CAP_SYS_PTRACE | 0 |
| CAP_SYS_BOOT | 0 |
| CAP_LEASE | 0 |
| CAP_WAKE_ALARM | 0 |
| CAP_BLOCK_SUSPEND | 0 |
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
and [selinux](http://selinuxproject.org/page/Main_Page) can be used with
the containers. A container should support setting an apparmor profile or
selinux process and mount labels if provided in the configuration.
Standard apparmor profile:
```c
#include <tunables/global>
profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
#include <abstractions/base>
network,
capability,
file,
umount,
deny @{PROC}/sys/fs/** wklx,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
deny @{PROC}/kmem rwklx,
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
deny @{PROC}/sys/kernel/*/** wklx,
deny mount,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,
deny /sys/fs/[^c]*/** wklx,
deny /sys/fs/c[^g]*/** wklx,
deny /sys/fs/cg[^r]*/** wklx,
deny /sys/firmware/efi/efivars/** rwklx,
deny /sys/kernel/security/** rwklx,
}
```
*TODO: seccomp work is being done to find a good default config*
### Runtime and Init Process
During container creation the parent process needs to talk to the container's init
process and have a form of synchronization. This is accomplished by creating
a pipe that is passed to the container's init. When the init process first spawns
it will block on its side of the pipe until the parent closes its side. This
allows the parent to have time to set the new process inside a cgroup hierarchy
and/or write any uid/gid mappings required for user namespaces.
The pipe is passed to the init process via FD 3.
The application consuming libcontainer should be compiled statically. libcontainer
does not define any init process and the arguments provided are used to `exec` the
process inside the application. There should be no long running init within the
container spec.
If a pseudo tty is provided to a container it will open and `dup2` the console
as the container's STDIN, STDOUT, STDERR as well as mounting the console
as `/dev/console`.
An extra set of mounts are provided to a container and setup for use. A container's
rootfs can contain some non portable files inside that can cause side effects during
execution of a process. These files are usually created and populated with the container
specific information via the runtime.
**Extra runtime files:**
* /etc/hosts
* /etc/resolv.conf
* /etc/hostname
* /etc/localtime
#### Defaults
There are a few defaults that can be overridden by users, but in their omission
these apply to processes within a container.
| Type | Value |
| ------------------- | ------------------------------ |
| Parent Death Signal | SIGKILL |
| UID | 0 |
| GID | 0 |
| GROUPS | 0, NULL |
| CWD | "/" |
| $HOME | Current user's home dir or "/" |
| Readonly rootfs | false |
| Pseudo TTY | false |
## Actions
After a container is created there is a standard set of actions that can
be done to the container. These actions are part of the public API for
a container.
| Action | Description |
| -------------- | ------------------------------------------------------------------ |
| Get processes | Return all the pids for processes running inside a container |
| Get Stats | Return resource statistics for the container as a whole |
| Wait | Waits on the container's init process ( pid 1 ) |
| Wait Process | Wait on any of the container's processes returning the exit status |
| Destroy | Kill the container's init process and remove any filesystem state |
| Signal | Send a signal to the container's init process |
| Signal Process | Send a signal to any of the container's processes |
| Pause | Pause all processes inside the container |
| Resume | Resume all processes inside the container if paused |
| Exec | Execute a new process inside of the container ( requires setns ) |
| Set | Setup configs of the container after it's created |
### Execute a new process inside of a running container
User can execute a new process inside of a running container. Any binaries to be
executed must be accessible within the container's rootfs.
The started process will run inside the container's rootfs. Any changes
made by the process to the container's filesystem will persist after the
process finished executing.
The started process will join all the container's existing namespaces. When the
container is paused, the process will also be paused and will resume when
the container is unpaused. The started process will only run when the container's
primary process (PID 1) is running, and will not be restarted when the container
is restarted.
#### Planned additions
The started process will have its own cgroups nested inside the container's
cgroups. This is used for process tracking and optionally resource allocation
handling for the new process. Freezer cgroup is required, the rest of the cgroups
are optional. The process executor must place its pid inside the correct
cgroups before starting the process. This is done so that no child processes or
threads can escape the cgroups.
When the process is stopped, the process executor will try (in a best-effort way)
to stop all its children and remove the sub-cgroups.

View file

@ -2,14 +2,10 @@
package apparmor
// #cgo LDFLAGS: -lapparmor
// #include <sys/apparmor.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"io/ioutil"
"os"
"unsafe"
)
// IsEnabled returns true if apparmor is enabled for the host.
@ -23,16 +19,36 @@ func IsEnabled() bool {
return false
}
func setprocattr(attr, value string) error {
// Under AppArmor you can only change your own attr, so use /proc/self/
// instead of /proc/<tid>/ like libapparmor does
path := fmt.Sprintf("/proc/self/attr/%s", attr)
f, err := os.OpenFile(path, os.O_WRONLY, 0)
if err != nil {
return err
}
defer f.Close()
_, err = fmt.Fprintf(f, "%s", value)
return err
}
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
func changeOnExec(name string) error {
value := "exec " + name
if err := setprocattr("exec", value); err != nil {
return fmt.Errorf("apparmor failed to apply profile: %s", err)
}
return nil
}
// ApplyProfile will apply the profile with the specified name to the process after
// the next exec.
func ApplyProfile(name string) error {
if name == "" {
return nil
}
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
if _, err := C.aa_change_onexec(cName); err != nil {
return err
}
return nil
return changeOnExec(name)
}

View file

@ -4,13 +4,13 @@ package libcontainer
import (
"fmt"
"os"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
var capabilityMap map[string]capability.Cap
@ -30,40 +30,84 @@ func init() {
}
}
func newCapWhitelist(caps []string) (*whitelist, error) {
l := []capability.Cap{}
for _, c := range caps {
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
bounding := []capability.Cap{}
for _, c := range capConfig.Bounding {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
l = append(l, v)
bounding = append(bounding, v)
}
pid, err := capability.NewPid(os.Getpid())
effective := []capability.Cap{}
for _, c := range capConfig.Effective {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
effective = append(effective, v)
}
inheritable := []capability.Cap{}
for _, c := range capConfig.Inheritable {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
inheritable = append(inheritable, v)
}
permitted := []capability.Cap{}
for _, c := range capConfig.Permitted {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
permitted = append(permitted, v)
}
ambient := []capability.Cap{}
for _, c := range capConfig.Ambient {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
ambient = append(ambient, v)
}
pid, err := capability.NewPid(0)
if err != nil {
return nil, err
}
return &whitelist{
keep: l,
pid: pid,
return &containerCapabilities{
bounding: bounding,
effective: effective,
inheritable: inheritable,
permitted: permitted,
ambient: ambient,
pid: pid,
}, nil
}
type whitelist struct {
pid capability.Capabilities
keep []capability.Cap
type containerCapabilities struct {
pid capability.Capabilities
bounding []capability.Cap
effective []capability.Cap
inheritable []capability.Cap
permitted []capability.Cap
ambient []capability.Cap
}
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
func (w *whitelist) dropBoundingSet() error {
w.pid.Clear(capability.BOUNDS)
w.pid.Set(capability.BOUNDS, w.keep...)
return w.pid.Apply(capability.BOUNDS)
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
func (c *containerCapabilities) ApplyBoundingSet() error {
c.pid.Clear(capability.BOUNDS)
c.pid.Set(capability.BOUNDS, c.bounding...)
return c.pid.Apply(capability.BOUNDS)
}
// drop drops all capabilities for the current process except those specified in the whitelist.
func (w *whitelist) drop() error {
w.pid.Clear(allCapabilityTypes)
w.pid.Set(allCapabilityTypes, w.keep...)
return w.pid.Apply(allCapabilityTypes)
// Apply sets all the capabilities for the current process in the config.
func (c *containerCapabilities) ApplyCaps() error {
c.pid.Clear(allCapabilityTypes)
c.pid.Set(capability.BOUNDS, c.bounding...)
c.pid.Set(capability.PERMITTED, c.permitted...)
c.pid.Set(capability.INHERITABLE, c.inheritable...)
c.pid.Set(capability.EFFECTIVE, c.effective...)
c.pid.Set(capability.AMBIENT, c.ambient...)
return c.pid.Apply(allCapabilityTypes)
}

View file

@ -27,9 +27,9 @@ type Manager interface {
// Destroys the cgroup set
Destroy() error
// NewCgroupManager() and LoadCgroupManager() require following attributes:
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
// Paths map[string]string
// Cgroups *cgroups.Cgroup
// Cgroups *configs.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems
@ -37,7 +37,7 @@ type Manager interface {
// restore the object later.
GetPaths() map[string]string
// Set the cgroup as configured.
// Sets the cgroup as configured.
Set(container *configs.Config) error
}

View file

@ -0,0 +1,18 @@
// +build linux
package cgroups
import (
"testing"
)
func TestParseCgroups(t *testing.T) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
t.Fatal(err)
}
if _, ok := cgroups["cpu"]; !ok {
t.Fail()
}
}

View file

@ -9,10 +9,8 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
@ -34,7 +32,6 @@ var (
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
)
@ -107,6 +104,8 @@ func (m *Manager) Apply(pid int) (err error) {
if m.Cgroups == nil {
return nil
}
m.mu.Lock()
defer m.mu.Unlock()
var c = m.Cgroups
@ -115,8 +114,8 @@ func (m *Manager) Apply(pid int) (err error) {
return err
}
m.Paths = make(map[string]string)
if c.Paths != nil {
paths := make(map[string]string)
for name, path := range c.Paths {
_, err := d.path(name)
if err != nil {
@ -125,37 +124,44 @@ func (m *Manager) Apply(pid int) (err error) {
}
return err
}
paths[name] = path
m.Paths[name] = path
}
m.Paths = paths
return cgroups.EnterPid(m.Paths, pid)
}
m.mu.Lock()
defer m.mu.Unlock()
paths := make(map[string]string)
for _, sys := range subsystems {
if err := sys.Apply(d); err != nil {
return err
}
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(sys.Name())
if err != nil {
if cgroups.IsNotFound(err) {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
continue
}
return err
}
paths[sys.Name()] = p
m.Paths[sys.Name()] = p
if err := sys.Apply(d); err != nil {
if os.IsPermission(err) && m.Cgroups.Path == "" {
// If we didn't set a cgroup path, then let's defer the error here
// until we know whether we have set limits or not.
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
// it will have the same limits as its parent.
delete(m.Paths, sys.Name())
continue
}
return err
}
}
m.Paths = paths
return nil
}
func (m *Manager) Destroy() error {
if m.Cgroups.Paths != nil {
if m.Cgroups == nil || m.Cgroups.Paths != nil {
return nil
}
m.mu.Lock()
@ -191,19 +197,20 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
for _, sys := range subsystems {
// Generate fake cgroup data.
d, err := getCgroupData(container.Cgroups, -1)
if err != nil {
return err
}
// Get the path, but don't error out if the cgroup wasn't found.
path, err := d.path(sys.Name())
if err != nil && !cgroups.IsNotFound(err) {
return err
}
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
paths := m.GetPaths()
for _, sys := range subsystems {
path := paths[sys.Name()]
if err := sys.Set(path, container.Cgroups); err != nil {
if path == "" {
// cgroup never applied
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
}
return err
}
}
@ -219,14 +226,8 @@ func (m *Manager) Set(container *configs.Config) error {
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func (m *Manager) Freeze(state configs.FreezerState) error {
d, err := getCgroupData(m.Cgroups, 0)
if err != nil {
return err
}
dir, err := d.path("freezer")
if err != nil {
return err
}
paths := m.GetPaths()
dir := paths["freezer"]
prevState := m.Cgroups.Resources.Freezer
m.Cgroups.Resources.Freezer = state
freezer, err := subsystems.Get("freezer")
@ -242,28 +243,13 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := getCgroupPath(m.Cgroups)
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
paths := m.GetPaths()
return cgroups.GetPids(paths["devices"])
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := getCgroupPath(m.Cgroups)
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func getCgroupPath(c *configs.Cgroup) (string, error) {
d, err := getCgroupData(c, 0)
if err != nil {
return "", err
}
return d.path("devices")
paths := m.GetPaths()
return cgroups.GetAllPids(paths["devices"])
}
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
@ -294,25 +280,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil
}
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}
func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
@ -320,11 +289,14 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.innerPath) {
// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
parentPath, err := raw.parentPath(subsystem, mnt, root)
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
@ -340,7 +312,7 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
return "", err
}
return path, nil
@ -353,8 +325,7 @@ func writeFile(dir, file, data string) error {
return fmt.Errorf("no such directory for %s", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
//return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
logrus.Debugf("failed to write %v to %v: %v", data, file, err)
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
}
return nil
}
@ -374,8 +345,8 @@ func removePath(p string, err error) error {
return nil
}
func CheckCpushares(path string, c int64) error {
var cpuShares int64
func CheckCpushares(path string, c uint64) error {
var cpuShares uint64
if c == 0 {
return nil

View file

@ -0,0 +1,272 @@
// +build linux
package fs
import (
"path/filepath"
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestInvalidCgroupPath(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Path: "../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
func TestInvalidAbsoluteCgroupPath(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Path: "/../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidCgroupParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "../../../../../../../../../../some/path",
Name: "name",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidAbsoluteCgroupParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "/../../../../../../../../../../some/path",
Name: "name",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidCgroupName(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "parent",
Name: "../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidAbsoluteCgroupName(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "parent",
Name: "/../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidCgroupNameAndParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "../../../../../../../../../../some/path",
Name: "../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "/../../../../../../../../../../some/path",
Name: "/../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}

View file

@ -0,0 +1,636 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
const (
sectorsRecursiveContents = `8:0 1024`
serviceBytesRecursiveContents = `8:0 Read 100
8:0 Write 200
8:0 Sync 300
8:0 Async 500
8:0 Total 500
Total 500`
servicedRecursiveContents = `8:0 Read 10
8:0 Write 40
8:0 Sync 20
8:0 Async 30
8:0 Total 50
Total 50`
queuedRecursiveContents = `8:0 Read 1
8:0 Write 4
8:0 Sync 2
8:0 Async 3
8:0 Total 5
Total 5`
serviceTimeRecursiveContents = `8:0 Read 173959
8:0 Write 0
8:0 Sync 0
8:0 Async 173959
8:0 Total 17395
Total 17395`
waitTimeRecursiveContents = `8:0 Read 15571
8:0 Write 0
8:0 Sync 0
8:0 Async 15571
8:0 Total 15571`
mergedRecursiveContents = `8:0 Read 5
8:0 Write 10
8:0 Sync 0
8:0 Async 0
8:0 Total 15
Total 15`
timeRecursiveContents = `8:0 8`
throttleServiceBytes = `8:0 Read 11030528
8:0 Write 23
8:0 Sync 42
8:0 Async 11030528
8:0 Total 11030528
252:0 Read 11030528
252:0 Write 23
252:0 Sync 42
252:0 Async 11030528
252:0 Total 11030528
Total 22061056`
throttleServiced = `8:0 Read 164
8:0 Write 23
8:0 Sync 42
8:0 Async 164
8:0 Total 164
252:0 Read 164
252:0 Write 23
252:0 Sync 42
252:0 Async 164
252:0 Total 164
Total 328`
)
func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) {
*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
}
func TestBlkioSetWeight(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightBefore = 100
weightAfter = 200
)
helper.writeFileContents(map[string]string{
"blkio.weight": strconv.Itoa(weightBefore),
})
helper.CgroupData.config.Resources.BlkioWeight = weightAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight")
if err != nil {
t.Fatalf("Failed to parse blkio.weight - %s", err)
}
if value != weightAfter {
t.Fatal("Got the wrong value, set blkio.weight failed.")
}
}
func TestBlkioSetWeightDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightDeviceBefore = "8:0 400"
)
wd := configs.NewWeightDevice(8, 0, 500, 0)
weightDeviceAfter := wd.WeightString()
helper.writeFileContents(map[string]string{
"blkio.weight_device": weightDeviceBefore,
})
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
if err != nil {
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
}
if value != weightDeviceAfter {
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
}
}
// regression #274
func TestBlkioSetMultipleWeightDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightDeviceBefore = "8:0 400"
)
wd1 := configs.NewWeightDevice(8, 0, 500, 0)
wd2 := configs.NewWeightDevice(8, 16, 500, 0)
// we cannot actually set and check both because normal ioutil.WriteFile
// when writing to cgroup file will overwrite the whole file content instead
// of updating it as the kernel is doing. Just check the second device
// is present will suffice for the test to ensure multiple writes are done.
weightDeviceAfter := wd2.WeightString()
helper.writeFileContents(map[string]string{
"blkio.weight_device": weightDeviceBefore,
})
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
if err != nil {
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
}
if value != weightDeviceAfter {
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
}
}
func TestBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
// Verify expected stats.
expectedStats := cgroups.BlkioStats{}
appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total")
appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "")
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
}
func TestBlkioStatsNoSectorsFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServiceBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServicedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoQueuedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServiceTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoWaitTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoMergedFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read 100 100",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsUnexpectedFieldType(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read Write",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestNonCFQBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "",
"blkio.io_serviced_recursive": "",
"blkio.io_queued_recursive": "",
"blkio.sectors_recursive": "",
"blkio.io_service_time_recursive": "",
"blkio.io_wait_time_recursive": "",
"blkio.io_merged_recursive": "",
"blkio.time_recursive": "",
"blkio.throttle.io_service_bytes": throttleServiceBytes,
"blkio.throttle.io_serviced": throttleServiced,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
// Verify expected stats.
expectedStats := cgroups.BlkioStats{}
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total")
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
}
func TestBlkioSetThrottleReadBpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.read_bps_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.")
}
}
func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.write_bps_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.")
}
}
func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.read_iops_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.")
}
}
func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.write_iops_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.")
}
}

View file

@ -22,21 +22,59 @@ func (s *CpuGroup) Name() string {
func (s *CpuGroup) Apply(d *cgroupData) error {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
_, err := d.join("cpu")
path, err := d.path("cpu")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
return s.ApplyDir(path, d.config, d.pid)
}
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
// This might happen if we have no cpu cgroup mounted.
// Just do nothing and don't fail.
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// We should set the real-Time group scheduling settings before moving
// in the process because if the process is already in SCHED_RR mode
// and no RT bandwidth is set, adding it will fail.
if err := s.SetRtSched(path, cgroup); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
return err
}
return nil
}
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil
}
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuShares != 0 {
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuPeriod != 0 {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
return err
}
}
@ -45,15 +83,8 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
}
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
return err
}
if err := s.SetRtSched(path, cgroup); err != nil {
return err
}
return nil

View file

@ -0,0 +1,209 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
func TestCpuSetShares(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
sharesBefore = 1024
sharesAfter = 512
)
helper.writeFileContents(map[string]string{
"cpu.shares": strconv.Itoa(sharesBefore),
})
helper.CgroupData.config.Resources.CpuShares = sharesAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares")
if err != nil {
t.Fatalf("Failed to parse cpu.shares - %s", err)
}
if value != sharesAfter {
t.Fatal("Got the wrong value, set cpu.shares failed.")
}
}
func TestCpuSetBandWidth(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
quotaBefore = 8000
quotaAfter = 5000
periodBefore = 10000
periodAfter = 7000
rtRuntimeBefore = 8000
rtRuntimeAfter = 5000
rtPeriodBefore = 10000
rtPeriodAfter = 7000
)
helper.writeFileContents(map[string]string{
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
})
helper.CgroupData.config.Resources.CpuQuota = quotaAfter
helper.CgroupData.config.Resources.CpuPeriod = periodAfter
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
}
if quota != quotaAfter {
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
}
period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
}
if period != periodAfter {
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
}
rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
}
if rtRuntime != rtRuntimeAfter {
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
}
rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
}
if rtPeriod != rtPeriodAfter {
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
}
}
func TestCpuStats(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
nrPeriods = 2000
nrThrottled = 200
throttledTime = uint64(18446744073709551615)
)
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
nrPeriods, nrThrottled, throttledTime)
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.ThrottlingData{
Periods: nrPeriods,
ThrottledPeriods: nrThrottled,
ThrottledTime: throttledTime}
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
}
func TestNoCpuStatFile(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal("Expected not to fail, but did")
}
}
func TestInvalidCpuStat(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpuStatContent := `nr_periods 2000
nr_throttled 200
throttled_time fortytwo`
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failed stat parsing.")
}
}
func TestCpuSetRtSchedAtApply(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
rtRuntimeBefore = 0
rtRuntimeAfter = 5000
rtPeriodBefore = 0
rtPeriodAfter = 7000
)
helper.writeFileContents(map[string]string{
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
})
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
cpu := &CpuGroup{}
if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil {
t.Fatal(err)
}
rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
}
if rtRuntime != rtRuntimeAfter {
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
}
rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
}
if rtPeriod != rtPeriodAfter {
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
}
pid, err := getCgroupParamUint(helper.CgroupPath, "cgroup.procs")
if err != nil {
t.Fatalf("Failed to parse cgroup.procs - %s", err)
}
if pid != 1234 {
t.Fatal("Got the wrong value, set cgroup.procs failed.")
}
}

View file

@ -8,7 +8,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
@ -58,16 +57,34 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
if dir == "" {
return nil
}
root, err := getCgroupRoot()
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
if err != nil {
return err
}
if err := s.ensureParent(dir, root); err != nil {
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
// 'ensureParent' start with parent because we don't want to
// explicitly inherit from parent, it could conflict with
// 'cpuset.cpu_exclusive'.
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
return err
}
if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
// We didn't inherit cpuset configs from parent, but we have
// to ensure cpuset configs are set before moving task into the
// cgroup.
// The logic is, if user specified cpuset configs, use these
// specified configs, otherwise, inherit from parent. This makes
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
// keep backward compatbility.
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
return err
}
@ -137,3 +154,10 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
func (s *CpusetGroup) isEmpty(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
if err := s.Set(path, cgroup); err != nil {
return err
}
return s.copyIfNeeded(path, filepath.Dir(path))
}

View file

@ -0,0 +1,65 @@
// +build linux
package fs
import (
"testing"
)
func TestCpusetSetCpus(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
cpusBefore = "0"
cpusAfter = "1-3"
)
helper.writeFileContents(map[string]string{
"cpuset.cpus": cpusBefore,
})
helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus")
if err != nil {
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
}
if value != cpusAfter {
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
}
}
func TestCpusetSetMems(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
memsBefore = "0"
memsAfter = "1"
)
helper.writeFileContents(map[string]string{
"cpuset.mems": memsBefore,
})
helper.CgroupData.config.Resources.CpusetMems = memsAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems")
if err != nil {
t.Fatalf("Failed to parse cpuset.mems - %s", err)
}
if value != memsAfter {
t.Fatal("Got the wrong value, set cpuset.mems failed.")
}
}

View file

@ -43,21 +43,23 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
}
return nil
}
if !cgroup.Resources.AllowAllDevices {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
if cgroup.Resources.AllowAllDevices != nil {
if *cgroup.Resources.AllowAllDevices == false {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
}
}
for _, dev := range cgroup.Resources.DeniedDevices {

View file

@ -0,0 +1,98 @@
// +build linux
package fs
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
allowedDevices = []*configs.Device{
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
}
allowedList = "c 1:5 rwm"
deniedDevices = []*configs.Device{
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
}
deniedList = "c 1:3 rwm"
)
func TestDevicesSetAllow(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.deny": "a",
})
allowAllDevices := false
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("Got the wrong value, set devices.allow failed.")
}
// When AllowAllDevices is nil, devices.allow file should not be modified.
helper.CgroupData.config.Resources.AllowAllDevices = nil
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err = getCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
}
}
func TestDevicesSetDeny(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.allow": "a",
})
allowAllDevices := true
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "devices.deny")
if err != nil {
t.Fatalf("Failed to parse devices.deny - %s", err)
}
if value != deniedList {
t.Fatal("Got the wrong value, set devices.deny failed.")
}
}

View file

@ -29,11 +29,15 @@ func (s *FreezerGroup) Apply(d *cgroupData) error {
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
switch cgroup.Resources.Freezer {
case configs.Frozen, configs.Thawed:
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
return err
}
for {
// In case this loop does not exit because it doesn't get the expected
// state, let's write again this state, hoping it's going to be properly
// set this time. Otherwise, this loop could run infinitely, waiting for
// a state change that would never happen.
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
return err
}
state, err := readFile(path, "freezer.state")
if err != nil {
return err
@ -41,6 +45,7 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
break
}
time.Sleep(1 * time.Millisecond)
}
case configs.Undefined:

View file

@ -0,0 +1,47 @@
// +build linux
package fs
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestFreezerSetState(t *testing.T) {
helper := NewCgroupTestUtil("freezer", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"freezer.state": string(configs.Frozen),
})
helper.CgroupData.config.Resources.Freezer = configs.Thawed
freezer := &FreezerGroup{}
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "freezer.state")
if err != nil {
t.Fatalf("Failed to parse freezer.state - %s", err)
}
if value != string(configs.Thawed) {
t.Fatal("Got the wrong value, set freezer.state failed.")
}
}
func TestFreezerSetInvalidState(t *testing.T) {
helper := NewCgroupTestUtil("freezer", t)
defer helper.cleanup()
const (
invalidArg configs.FreezerState = "Invalid"
)
helper.CgroupData.config.Resources.Freezer = invalidArg
freezer := &FreezerGroup{}
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
t.Fatal("Failed to return invalid argument error")
}
}

View file

@ -0,0 +1,154 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
const (
hugetlbUsageContents = "128\n"
hugetlbMaxUsageContents = "256\n"
hugetlbFailcnt = "100\n"
)
var (
usage = "hugetlb.%s.usage_in_bytes"
limit = "hugetlb.%s.limit_in_bytes"
maxUsage = "hugetlb.%s.max_usage_in_bytes"
failcnt = "hugetlb.%s.failcnt"
)
func TestHugetlbSetHugetlb(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
const (
hugetlbBefore = 256
hugetlbAfter = 512
)
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
})
}
for _, pageSize := range HugePageSizes {
helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
{
Pagesize: pageSize,
Limit: hugetlbAfter,
},
}
hugetlb := &HugetlbGroup{}
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
}
for _, pageSize := range HugePageSizes {
limit := fmt.Sprintf(limit, pageSize)
value, err := getCgroupParamUint(helper.CgroupPath, limit)
if err != nil {
t.Fatalf("Failed to parse %s - %s", limit, err)
}
if value != hugetlbAfter {
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
}
}
}
func TestHugetlbStats(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt,
})
}
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
for _, pageSize := range HugePageSizes {
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
}
}
func TestHugetlbStatsNoUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
maxUsage: hugetlbMaxUsageContents,
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
})
}
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsBadUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(usage, pageSize): "bad",
maxUsage: hugetlbMaxUsageContents,
})
}
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
usage: hugetlbUsageContents,
maxUsage: "bad",
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}

View file

@ -5,13 +5,23 @@ package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall" // only for Errno
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
)
type MemoryGroup struct {
@ -25,20 +35,23 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
path, err := d.path("memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
} else if path == "" {
return nil
}
if memoryAssigned(d.config) {
if path != "" {
if _, err := os.Stat(path); os.IsNotExist(err) {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached.
if err := s.SetKernelMemory(path, d.config); err != nil {
return err
// Only enable kernel memory accouting when this cgroup
// is created by libcontainer, otherwise we might get
// error when people use `cgroupsPath` to join an existed
// cgroup whose kernel memory is not initialized.
if err := EnableKernelMemoryAccounting(path); err != nil {
return err
}
}
}
defer func() {
if err != nil {
os.RemoveAll(path)
@ -54,21 +67,57 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
return nil
}
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
// This has to be done separately because it has special constraints (it
// can't be done after there are processes attached to the cgroup).
if cgroup.Resources.KernelMemory > 0 {
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := setKernelMemory(path, i); err != nil {
return err
}
}
return nil
}
func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// If the memory update is set to -1 we should also
// set swap to -1, it means unlimited memory.
if cgroup.Resources.Memory == -1 {
// Only set swap if it's enabled in kernel
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
cgroup.Resources.MemorySwap = -1
}
}
// When memory and swap memory are both set, we need to handle the cases
// for updating container.
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 {
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return err
@ -77,29 +126,29 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// When update memory limit, we should adapt the write sequence
// for memory and swap memory, so it won't fail because the new
// value and the old value don't fit kernel's validation.
if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
} else {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
} else {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap != 0 {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
@ -113,11 +162,18 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
if cgroup.Resources.KernelMemory != 0 {
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
return err
}
}
if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return err
}
}
if cgroup.Resources.KernelMemoryTCP != 0 {
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
return err
@ -130,12 +186,12 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
}
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
return nil
} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
} else if *cgroup.Resources.MemorySwappiness <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
return err
}
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
}
return nil
@ -187,6 +243,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
}
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
value, err := getCgroupParamUint(path, useHierarchy)
if err != nil {
return err
}
if value == 1 {
stats.MemoryStats.UseHierarchy = true
}
return nil
}
@ -197,7 +261,7 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
cgroup.Resources.KernelMemory > 0 ||
cgroup.Resources.KernelMemoryTCP > 0 ||
cgroup.Resources.OomKillDisable ||
(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {

View file

@ -0,0 +1,455 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
const (
memoryStatContents = `cache 512
rss 1024`
memoryUsageContents = "2048\n"
memoryMaxUsageContents = "4096\n"
memoryFailcnt = "100\n"
memoryLimitContents = "8192\n"
memoryUseHierarchyContents = "1\n"
)
func TestMemorySetMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 314572800 // 300M
memoryAfter = 524288000 // 500M
reservationBefore = 209715200 // 200M
reservationAfter = 314572800 // 300M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
})
helper.CgroupData.config.Resources.Memory = memoryAfter
helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
}
if value != reservationAfter {
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
}
}
func TestMemorySetMemoryswap(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryswapBefore = 314572800 // 300M
memoryswapAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
})
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
}
}
func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 314572800 // 300M
memoryswapBefore = 524288000 // 500M
memoryAfter = 629145600 // 600M
memoryswapAfter = 838860800 // 800M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
// Set will call getMemoryData when memory and swap memory are
// both set, fake these fields so we don't get error.
"memory.usage_in_bytes": "0",
"memory.max_usage_in_bytes": "0",
"memory.failcnt": "0",
})
helper.CgroupData.config.Resources.Memory = memoryAfter
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
}
}
func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 629145600 // 600M
memoryswapBefore = 838860800 // 800M
memoryAfter = 314572800 // 300M
memoryswapAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
// Set will call getMemoryData when memory and swap memory are
// both set, fake these fields so we don't get error.
"memory.usage_in_bytes": "0",
"memory.max_usage_in_bytes": "0",
"memory.failcnt": "0",
})
helper.CgroupData.config.Resources.Memory = memoryAfter
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
}
}
func TestMemorySetKernelMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
kernelMemoryBefore = 314572800 // 300M
kernelMemoryAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
})
helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
}
if value != kernelMemoryAfter {
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
}
}
func TestMemorySetKernelMemoryTCP(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
kernelMemoryTCPBefore = 314572800 // 300M
kernelMemoryTCPAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore),
})
helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err)
}
if value != kernelMemoryTCPAfter {
t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.")
}
}
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
swappinessBefore := 60 //default is 60
swappinessAfter := uint64(0)
helper.writeFileContents(map[string]string{
"memory.swappiness": strconv.Itoa(swappinessBefore),
})
helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.swappiness")
if err != nil {
t.Fatalf("Failed to parse memory.swappiness - %s", err)
}
if value != swappinessAfter {
t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter)
}
}
func TestMemoryStats(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.failcnt": memoryFailcnt,
"memory.memsw.usage_in_bytes": memoryUsageContents,
"memory.memsw.max_usage_in_bytes": memoryMaxUsageContents,
"memory.memsw.failcnt": memoryFailcnt,
"memory.memsw.limit_in_bytes": memoryLimitContents,
"memory.kmem.usage_in_bytes": memoryUsageContents,
"memory.kmem.max_usage_in_bytes": memoryMaxUsageContents,
"memory.kmem.failcnt": memoryFailcnt,
"memory.kmem.limit_in_bytes": memoryLimitContents,
"memory.use_hierarchy": memoryUseHierarchyContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}, UseHierarchy: true}
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
}
func TestMemoryStatsNoStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
}
func TestMemoryStatsNoUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": "rss rss",
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": "bad",
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": "bad",
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": "bad",
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemorySetOomControl(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
oomKillDisable = 1 // disable oom killer, default is 0
)
helper.writeFileContents(map[string]string{
"memory.oom_control": strconv.Itoa(oomKillDisable),
})
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control")
if err != nil {
t.Fatalf("Failed to parse memory.oom_control - %s", err)
}
if value != oomKillDisable {
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
}
}

View file

@ -3,6 +3,8 @@
package fs
import (
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
@ -23,8 +25,8 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
}
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.NetClsClassid != "" {
if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil {
if cgroup.Resources.NetClsClassid != 0 {
if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
return err
}
}

View file

@ -0,0 +1,39 @@
// +build linux
package fs
import (
"strconv"
"testing"
)
const (
classidBefore = 0x100002
classidAfter = 0x100001
)
func TestNetClsSetClassid(t *testing.T) {
helper := NewCgroupTestUtil("net_cls", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"net_cls.classid": strconv.FormatUint(classidBefore, 10),
})
helper.CgroupData.config.Resources.NetClsClassid = classidAfter
netcls := &NetClsGroup{}
if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
// As we are in mock environment, we can't get correct value of classid from
// net_cls.classid.
// So. we just judge if we successfully write classid into file
value, err := getCgroupParamUint(helper.CgroupPath, "net_cls.classid")
if err != nil {
t.Fatalf("Failed to parse net_cls.classid - %s", err)
}
if value != classidAfter {
t.Fatal("Got the wrong value, set net_cls.classid failed.")
}
}

View file

@ -0,0 +1,38 @@
// +build linux
package fs
import (
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
prioMap = []*configs.IfPrioMap{
{
Interface: "test",
Priority: 5,
},
}
)
func TestNetPrioSetIfPrio(t *testing.T) {
helper := NewCgroupTestUtil("net_prio", t)
defer helper.cleanup()
helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
netPrio := &NetPrioGroup{}
if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
if err != nil {
t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
}
if !strings.Contains(value, "test 5") {
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
}
}

View file

@ -0,0 +1,111 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
const (
maxUnlimited = -1
maxLimited = 1024
)
func TestPidsSetMax(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.max": "max",
})
helper.CgroupData.config.Resources.PidsLimit = maxLimited
pids := &PidsGroup{}
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "pids.max")
if err != nil {
t.Fatalf("Failed to parse pids.max - %s", err)
}
if value != maxLimited {
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
}
}
func TestPidsSetUnlimited(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.max": strconv.Itoa(maxLimited),
})
helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
pids := &PidsGroup{}
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "pids.max")
if err != nil {
t.Fatalf("Failed to parse pids.max - %s", err)
}
if value != "max" {
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
}
}
func TestPidsStats(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.current": strconv.Itoa(1337),
"pids.max": strconv.Itoa(maxLimited),
})
pids := &PidsGroup{}
stats := *cgroups.NewStats()
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
t.Fatal(err)
}
if stats.PidsStats.Current != 1337 {
t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current)
}
if stats.PidsStats.Limit != maxLimited {
t.Fatalf("Expected %d, got %d for pids.max", maxLimited, stats.PidsStats.Limit)
}
}
func TestPidsStatsUnlimited(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.current": strconv.Itoa(4096),
"pids.max": "max",
})
pids := &PidsGroup{}
stats := *cgroups.NewStats()
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
t.Fatal(err)
}
if stats.PidsStats.Current != 4096 {
t.Fatalf("Expected %d, got %d for pids.current", 4096, stats.PidsStats.Current)
}
if stats.PidsStats.Limit != 0 {
t.Fatalf("Expected %d, got %d for pids.max", 0, stats.PidsStats.Limit)
}
}

View file

@ -0,0 +1,123 @@
// +build linux
package fs
import (
"fmt"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/sirupsen/logrus"
)
func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error {
if len(expected) != len(actual) {
return fmt.Errorf("blkioStatEntries length do not match")
}
for i, expValue := range expected {
actValue := actual[i]
if expValue != actValue {
return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue)
}
}
return nil
}
func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil {
logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil {
logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil {
logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil {
logrus.Printf("blkio SectorsRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil {
logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil {
logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil {
logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil {
logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err)
t.Fail()
}
}
func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) {
if expected != actual {
logrus.Printf("Expected throttling data %v but found %v\n", expected, actual)
t.Fail()
}
}
func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) {
if expected != actual {
logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual)
t.Fail()
}
}
func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) {
expectMemoryDataEquals(t, expected.Usage, actual.Usage)
expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage)
expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage)
if expected.UseHierarchy != actual.UseHierarchy {
logrus.Printf("Expected memory use hiearchy %v, but found %v\n", expected.UseHierarchy, actual.UseHierarchy)
t.Fail()
}
for key, expValue := range expected.Stats {
actValue, ok := actual.Stats[key]
if !ok {
logrus.Printf("Expected memory stat key %s not found\n", key)
t.Fail()
}
if expValue != actValue {
logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue)
t.Fail()
}
}
}
func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) {
if expected.Usage != actual.Usage {
logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage)
t.Fail()
}
if expected.MaxUsage != actual.MaxUsage {
logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage)
t.Fail()
}
if expected.Failcnt != actual.Failcnt {
logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
t.Fail()
}
if expected.Limit != actual.Limit {
logrus.Printf("Expected memory limit %d but found %d\n", expected.Limit, actual.Limit)
t.Fail()
}
}

View file

@ -0,0 +1,67 @@
// +build linux
/*
Utility for testing cgroup operations.
Creates a mock of the cgroup filesystem for the duration of the test.
*/
package fs
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
type cgroupTestUtil struct {
// cgroup data to use in tests.
CgroupData *cgroupData
// Path to the mock cgroup directory.
CgroupPath string
// Temporary directory to store mock cgroup filesystem.
tempDir string
t *testing.T
}
// Creates a new test util for the specified subsystem
func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
d := &cgroupData{
config: &configs.Cgroup{},
}
d.config.Resources = &configs.Resources{}
tempDir, err := ioutil.TempDir("", "cgroup_test")
if err != nil {
t.Fatal(err)
}
d.root = tempDir
testCgroupPath := filepath.Join(d.root, subsystem)
if err != nil {
t.Fatal(err)
}
// Ensure the full mock cgroup path exists.
err = os.MkdirAll(testCgroupPath, 0755)
if err != nil {
t.Fatal(err)
}
return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t}
}
func (c *cgroupTestUtil) cleanup() {
os.RemoveAll(c.tempDir)
}
// Write the specified contents on the mock of the specified cgroup files.
func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) {
for file, contents := range fileContents {
err := writeFile(c.CgroupPath, file, contents)
if err != nil {
c.t.Fatal(err)
}
}
}

View file

@ -12,7 +12,6 @@ import (
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)

View file

@ -0,0 +1,97 @@
// +build linux
package fs
import (
"io/ioutil"
"math"
"os"
"path/filepath"
"strconv"
"testing"
)
const (
cgroupFile = "cgroup.file"
floatValue = 2048.0
floatString = "2048"
)
func TestGetCgroupParamsInt(t *testing.T) {
// Setup tempdir.
tempDir, err := ioutil.TempDir("", "cgroup_utils_test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tempDir)
tempFile := filepath.Join(tempDir, cgroupFile)
// Success.
err = ioutil.WriteFile(tempFile, []byte(floatString), 0755)
if err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %d to equal %f", value, floatValue)
}
// Success with new line.
err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %d to equal %f", value, floatValue)
}
// Success with negative values
err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Success with negative values lesser than min int64
s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64)
err = ioutil.WriteFile(tempFile, []byte(s), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Not a float.
err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamUint(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
// Unknown file.
err = os.Remove(tempFile)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamUint(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
}

View file

@ -51,6 +51,8 @@ type MemoryStats struct {
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
// usage of kernel TCP memory
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`
Stats map[string]uint64 `json:"stats,omitempty"`
}

View file

@ -1,4 +1,4 @@
// +build !linux
// +build !linux static_build
package systemd
@ -43,7 +43,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
return nil, fmt.Errorf("Systemd not supported")
return fmt.Errorf("Systemd not supported")
}
func (m *Manager) Freeze(state configs.FreezerState) error {

View file

@ -1,14 +1,12 @@
// +build linux
// +build linux,!static_build
package systemd
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
@ -19,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
)
type Manager struct {
@ -67,13 +66,16 @@ var subsystems = subsystemSet{
const (
testScopeWait = 4
testSliceWait = 4
)
var (
connLock sync.Mutex
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasStartTransientSliceUnit bool
hasTransientDefaultDependencies bool
hasDelegate bool
)
func newProp(name string, units interface{}) systemdDbus.Property {
@ -146,6 +148,48 @@ func UseSystemd() bool {
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
// Assume StartTransientUnit on a scope allows Delegate
hasDelegate = true
dl := newProp("Delegate", true)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasDelegate = false
}
}
}
// Assume we have the ability to start a transient unit as a slice
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
hasStartTransientSliceUnit = true
// To ensure simple clean-up, we create a slice off the root with no hierarchy
slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
if _, ok := err.(dbus.Error); ok {
hasStartTransientSliceUnit = false
}
}
for i := 0; i <= testSliceWait; i++ {
if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
hasStartTransientSliceUnit = false
break
}
}
} else {
break
}
time.Sleep(time.Millisecond)
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
theConn.StopUnit(slice, "replace", nil)
}
return hasStartTransientUnit
}
@ -179,13 +223,29 @@ func (m *Manager) Apply(pid int) error {
slice = c.Parent
}
properties = append(properties,
systemdDbus.PropSlice(slice),
systemdDbus.PropDescription("docker container "+c.Name),
newProp("PIDs", []uint32{uint32(pid)}),
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
// if we create a slice, the parent is defined via a Wants=
if strings.HasSuffix(unitName, ".slice") {
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
if !hasStartTransientSliceUnit {
return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
}
properties = append(properties, systemdDbus.PropWants(slice))
} else {
// otherwise, we use Slice=
properties = append(properties, systemdDbus.PropSlice(slice))
}
// only add pid if its valid, -1 is used w/ general slice creation.
if pid != -1 {
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
}
if hasDelegate {
// This is only supported on systemd versions 218 and above.
newProp("Delegate", true),
)
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
@ -206,7 +266,21 @@ func (m *Manager) Apply(pid int) error {
if c.Resources.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", uint64(c.Resources.CpuShares)))
newProp("CPUShares", c.Resources.CpuShares))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.BlkioWeight != 0 {
@ -214,20 +288,25 @@ func (m *Manager) Apply(pid int) error {
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
}
// We need to set kernel memory before processes join cgroup because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
// And swap memory limit needs to be set after memory limit, only
// memory limit is handled by systemd, so it's kind of ugly here.
if c.Resources.KernelMemory > 0 {
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if c.Resources.KernelMemory != 0 {
if err := setKernelMemory(c); err != nil {
return err
}
}
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil {
statusChan := make(chan string)
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) {
return err
}
select {
case <-statusChan:
case <-time.After(time.Second):
logrus.Warnf("Timed out while waiting for StartTransientUnit completion signal from dbus. Continuing...")
}
if err := joinCgroups(c, pid); err != nil {
return err
}
@ -269,15 +348,6 @@ func (m *Manager) GetPaths() map[string]string {
return paths
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
path, err := getSubsystemPath(c, subsystem)
if err != nil {
@ -286,10 +356,9 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
return "", err
}
return path, nil
}
@ -299,7 +368,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
switch name {
case "name=systemd":
// let systemd handle this
break
case "cpuset":
path, err := getSubsystemPath(c, name)
if err != nil && !cgroups.IsNotFound(err) {
@ -309,7 +377,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
if err := s.ApplyDir(path, c, pid); err != nil {
return err
}
break
default:
_, err := join(c, name, pid)
if err != nil {
@ -331,10 +398,10 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
return nil
}
// systemd represents slice heirarchy using `-`, so we need to follow suit when
// systemd represents slice hierarchy using `-`, so we need to follow suit when
// generating the path of slice. Essentially, test-a-b.slice becomes
// test.slice/test-a.slice/test-a-b.slice.
func expandSlice(slice string) (string, error) {
// /test.slice/test-a.slice/test-a-b.slice.
func ExpandSlice(slice string) (string, error) {
suffix := ".slice"
// Name has to end with ".slice", but can't be just ".slice".
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
@ -348,6 +415,10 @@ func expandSlice(slice string) (string, error) {
var path, prefix string
sliceName := strings.TrimSuffix(slice, suffix)
// if input was -.slice, we should just return root now
if sliceName == "-" {
return "/", nil
}
for _, component := range strings.Split(sliceName, "-") {
// test--a.slice isn't permitted, nor is -test.slice.
if component == "" {
@ -355,10 +426,9 @@ func expandSlice(slice string) (string, error) {
}
// Append the component to the path and to the prefix.
path += prefix + component + suffix + "/"
path += "/" + prefix + component + suffix
prefix += component + "-"
}
return path, nil
}
@ -368,17 +438,19 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err
}
initPath, err := cgroups.GetInitCgroupDir(subsystem)
initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil {
return "", err
}
// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
slice := "system.slice"
if c.Parent != "" {
slice = c.Parent
}
slice, err = expandSlice(slice)
slice, err = ExpandSlice(slice)
if err != nil {
return "", err
}
@ -439,6 +511,11 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
for _, sys := range subsystems {
// Get the subsystem path, but don't error out for not found cgroups.
path, err := getSubsystemPath(container.Cgroups, sys.Name())
@ -460,7 +537,11 @@ func (m *Manager) Set(container *configs.Config) error {
}
func getUnitName(c *configs.Cgroup) string {
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
// by default, we create a scope unless the user explicitly asks for a slice.
if !strings.HasSuffix(c.Name, ".slice") {
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
}
return c.Name
}
func setKernelMemory(c *configs.Cgroup) error {
@ -472,8 +553,15 @@ func setKernelMemory(c *configs.Cgroup) error {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// This doesn't get called by manager.Set, so we need to do it here.
s := &fs.MemoryGroup{}
return s.SetKernelMemory(path, c)
return fs.EnableKernelMemoryAccounting(path)
}
// isUnitExists returns true if the error is that a systemd unit already exists.
func isUnitExists(err error) bool {
if err != nil {
if dbusError, ok := err.(dbus.Error); ok {
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
}
}
return false
}

View file

@ -16,37 +16,24 @@ import (
"github.com/docker/go-units"
)
const cgroupNamePrefix = "name="
const (
cgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError(subsystem)
mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
@ -70,6 +57,30 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func GetClosestMountpointAncestor(dir, mountinfo string) string {
deepestMountPoint := ""
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
mountInfoParts := strings.Fields(mountInfoEntry)
if len(mountInfoParts) < 5 {
continue
}
mountPoint := mountInfoParts[4]
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
deepestMountPoint = mountPoint
}
}
return deepestMountPoint
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@ -113,7 +124,7 @@ type Mount struct {
Subsystems []string
}
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
@ -121,16 +132,17 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
for scanner.Scan() {
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+9] != "cgroup" {
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
@ -139,12 +151,17 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if !ss[opt] {
continue
}
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
}
if ss[opt] {
} else {
m.Subsystems = append(m.Subsystems, opt)
}
if !all {
numFound++
}
}
res = append(res, m)
}
@ -154,23 +171,25 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
return res, nil
}
func GetCgroupMounts() ([]Mount, error) {
// GetCgroupMounts returns the mounts for the cgroup subsystems.
// all indicates whether to return just the first instance or all the mounts.
func GetCgroupMounts(all bool) ([]Mount, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
all, err := GetAllSubsystems()
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for _, s := range all {
for s := range allSubsystems {
allMap[s] = true
}
return getCgroupMountsHelper(allMap, f)
return getCgroupMountsHelper(allMap, f, all)
}
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
@ -185,9 +204,6 @@ func GetAllSubsystems() ([]string, error) {
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text[0] != '#' {
parts := strings.Fields(text)
@ -196,11 +212,14 @@ func GetAllSubsystems() ([]string, error) {
}
}
}
if err := s.Err(); err != nil {
return nil, err
}
return subsystems, nil
}
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
@ -209,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupDir(subsystem string) (string, error) {
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
@ -219,8 +246,33 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil {
return nil, err
}
@ -243,6 +295,8 @@ func readProcsFile(dir string) ([]int, error) {
return out, nil
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@ -250,21 +304,35 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
defer f.Close()
s := bufio.NewScanner(f)
return parseCgroupFromReader(f)
}
// helper function for ParseCgroupFile to make testing easier
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
s := bufio.NewScanner(r)
cgroups := make(map[string]string)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
parts := strings.Split(text, ":")
// from cgroups(7):
// /proc/[pid]/cgroup
// ...
// For each cgroup hierarchy ... there is one entry
// containing three colon-separated fields of the form:
// hierarchy-ID:subsystem-list:cgroup-path
parts := strings.SplitN(text, ":", 3)
if len(parts) < 3 {
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
}
for _, subs := range strings.Split(parts[1], ",") {
cgroups[subs] = parts[2]
}
}
if err := s.Err(); err != nil {
return nil, err
}
return cgroups, nil
}
@ -291,8 +359,7 @@ func PathExists(path string) bool {
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
if err := WriteCgroupProc(path, pid); err != nil {
return err
}
}
@ -361,7 +428,7 @@ func GetAllPids(path string) ([]int, error) {
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
dir, file := filepath.Split(p)
if file != "cgroup.procs" {
if file != CgroupProcesses {
return nil
}
if iErr != nil {
@ -376,3 +443,20 @@ func GetAllPids(path string) ([]int, error) {
})
return pids, err
}
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
func WriteCgroupProc(dir string, pid int) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", CgroupProcesses)
}
// Dont attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
}
return nil
}

View file

@ -0,0 +1,333 @@
// +build linux
package cgroups
import (
"bytes"
"fmt"
"reflect"
"strings"
"testing"
)
const fedoraMountinfo = `15 35 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
16 35 0:14 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
17 35 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8056484k,nr_inodes=2014121,mode=755
18 16 0:15 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
19 16 0:13 / /sys/fs/selinux rw,relatime shared:8 - selinuxfs selinuxfs rw
20 17 0:16 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
21 17 0:10 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
22 35 0:17 / /run rw,nosuid,nodev shared:21 - tmpfs tmpfs rw,seclabel,mode=755
23 16 0:18 / /sys/fs/cgroup rw,nosuid,nodev,noexec shared:9 - tmpfs tmpfs rw,seclabel,mode=755
24 23 0:19 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
25 16 0:20 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw
26 23 0:21 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,clone_children
27 23 0:22 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu,clone_children
28 23 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,memory,clone_children
29 23 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,devices,clone_children
30 23 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,freezer,clone_children
31 23 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,net_cls,clone_children
32 23 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,blkio,clone_children
33 23 0:28 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,perf_event,clone_children
34 23 0:29 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,hugetlb,clone_children
35 1 253:2 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root--f20 rw,seclabel,data=ordered
36 15 0:30 / /proc/sys/fs/binfmt_misc rw,relatime shared:22 - autofs systemd-1 rw,fd=38,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
37 17 0:12 / /dev/mqueue rw,relatime shared:23 - mqueue mqueue rw,seclabel
38 35 0:31 / /tmp rw shared:24 - tmpfs tmpfs rw,seclabel
39 17 0:32 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel
40 16 0:7 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw
41 16 0:33 / /sys/kernel/config rw,relatime shared:27 - configfs configfs rw
42 35 0:34 / /var/lib/nfs/rpc_pipefs rw,relatime shared:28 - rpc_pipefs sunrpc rw
43 15 0:35 / /proc/fs/nfsd rw,relatime shared:29 - nfsd sunrpc rw
45 35 8:17 / /boot rw,relatime shared:30 - ext4 /dev/sdb1 rw,seclabel,data=ordered
46 35 253:4 / /home rw,relatime shared:31 - ext4 /dev/mapper/ssd-home rw,seclabel,data=ordered
47 35 253:5 / /var/lib/libvirt/images rw,noatime,nodiratime shared:32 - ext4 /dev/mapper/ssd-virt rw,seclabel,discard,data=ordered
48 35 253:12 / /mnt/old rw,relatime shared:33 - ext4 /dev/mapper/HelpDeskRHEL6-FedoraRoot rw,seclabel,data=ordered
121 22 0:36 / /run/user/1000/gvfs rw,nosuid,nodev,relatime shared:104 - fuse.gvfsd-fuse gvfsd-fuse rw,user_id=1000,group_id=1000
124 16 0:37 / /sys/fs/fuse/connections rw,relatime shared:107 - fusectl fusectl rw
165 38 253:3 / /tmp/mnt rw,relatime shared:147 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
167 35 253:15 / /var/lib/docker/devicemapper/mnt/aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,relatime shared:149 - ext4 /dev/mapper/docker-253:2-425882-aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,seclabel,discard,stripe=16,data=ordered
171 35 253:16 / /var/lib/docker/devicemapper/mnt/c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,relatime shared:153 - ext4 /dev/mapper/docker-253:2-425882-c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,seclabel,discard,stripe=16,data=ordered
175 35 253:17 / /var/lib/docker/devicemapper/mnt/1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,relatime shared:157 - ext4 /dev/mapper/docker-253:2-425882-1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,seclabel,discard,stripe=16,data=ordered
179 35 253:18 / /var/lib/docker/devicemapper/mnt/d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,relatime shared:161 - ext4 /dev/mapper/docker-253:2-425882-d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,seclabel,discard,stripe=16,data=ordered
183 35 253:19 / /var/lib/docker/devicemapper/mnt/6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,relatime shared:165 - ext4 /dev/mapper/docker-253:2-425882-6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,seclabel,discard,stripe=16,data=ordered
187 35 253:20 / /var/lib/docker/devicemapper/mnt/8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,relatime shared:169 - ext4 /dev/mapper/docker-253:2-425882-8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,seclabel,discard,stripe=16,data=ordered
191 35 253:21 / /var/lib/docker/devicemapper/mnt/c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,relatime shared:173 - ext4 /dev/mapper/docker-253:2-425882-c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,seclabel,discard,stripe=16,data=ordered
195 35 253:22 / /var/lib/docker/devicemapper/mnt/2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,relatime shared:177 - ext4 /dev/mapper/docker-253:2-425882-2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,seclabel,discard,stripe=16,data=ordered
199 35 253:23 / /var/lib/docker/devicemapper/mnt/37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,relatime shared:181 - ext4 /dev/mapper/docker-253:2-425882-37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,seclabel,discard,stripe=16,data=ordered
203 35 253:24 / /var/lib/docker/devicemapper/mnt/aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,relatime shared:185 - ext4 /dev/mapper/docker-253:2-425882-aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,seclabel,discard,stripe=16,data=ordered
207 35 253:25 / /var/lib/docker/devicemapper/mnt/928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,relatime shared:189 - ext4 /dev/mapper/docker-253:2-425882-928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,seclabel,discard,stripe=16,data=ordered
211 35 253:26 / /var/lib/docker/devicemapper/mnt/0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,relatime shared:193 - ext4 /dev/mapper/docker-253:2-425882-0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,seclabel,discard,stripe=16,data=ordered
215 35 253:27 / /var/lib/docker/devicemapper/mnt/d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,relatime shared:197 - ext4 /dev/mapper/docker-253:2-425882-d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,seclabel,discard,stripe=16,data=ordered
219 35 253:28 / /var/lib/docker/devicemapper/mnt/bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,relatime shared:201 - ext4 /dev/mapper/docker-253:2-425882-bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,seclabel,discard,stripe=16,data=ordered
223 35 253:29 / /var/lib/docker/devicemapper/mnt/7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,relatime shared:205 - ext4 /dev/mapper/docker-253:2-425882-7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,seclabel,discard,stripe=16,data=ordered
227 35 253:30 / /var/lib/docker/devicemapper/mnt/c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,relatime shared:209 - ext4 /dev/mapper/docker-253:2-425882-c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,seclabel,discard,stripe=16,data=ordered
231 35 253:31 / /var/lib/docker/devicemapper/mnt/8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,relatime shared:213 - ext4 /dev/mapper/docker-253:2-425882-8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,seclabel,discard,stripe=16,data=ordered
235 35 253:32 / /var/lib/docker/devicemapper/mnt/1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,relatime shared:217 - ext4 /dev/mapper/docker-253:2-425882-1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,seclabel,discard,stripe=16,data=ordered
239 35 253:33 / /var/lib/docker/devicemapper/mnt/e9aa60c60128cad1 rw,relatime shared:221 - ext4 /dev/mapper/docker-253:2-425882-e9aa60c60128cad1 rw,seclabel,discard,stripe=16,data=ordered
243 35 253:34 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,relatime shared:225 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,seclabel,discard,stripe=16,data=ordered
247 35 253:35 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,relatime shared:229 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,seclabel,discard,stripe=16,data=ordered
31 21 0:23 / /DATA/foo_bla_bla rw,relatime - cifs //foo/BLA\040BLA\040BLA/ rw,sec=ntlm,cache=loose,unc=\\foo\BLA BLA BLA,username=my_login,domain=mydomain.com,uid=12345678,forceuid,gid=12345678,forcegid,addr=10.1.30.10,file_mode=0755,dir_mode=0755,nounix,rsize=61440,wsize=65536,actimeo=1`
const systemdMountinfo = `115 83 0:32 / / rw,relatime - aufs none rw,si=c0bd3d3,dio,dirperm1
116 115 0:35 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
117 115 0:36 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
118 117 0:37 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
119 115 0:38 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
120 119 0:39 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
121 120 0:19 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
122 120 0:20 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
123 120 0:21 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
124 120 0:22 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
125 120 0:23 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio
126 120 0:24 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
127 120 0:25 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children
128 120 0:26 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct
129 120 0:27 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event,release_agent=/run/cgmanager/agents/cgm-release-agent.perf_event
130 115 43:0 /var/lib/docker/volumes/a44a712176377f57c094397330ee04387284c478364eb25f4c3d25f775f25c26/_data /var/lib/docker rw,relatime - ext4 /dev/nbd0 rw,data=ordered
131 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/nbd0 rw,data=ordered
132 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hostname /etc/hostname rw,relatime - ext4 /dev/nbd0 rw,data=ordered
133 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hosts /etc/hosts rw,relatime - ext4 /dev/nbd0 rw,data=ordered
134 117 0:33 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
135 117 0:13 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
136 117 0:12 /1 /dev/console rw,nosuid,noexec,relatime - devpts none rw,gid=5,mode=620,ptmxmode=000
84 115 0:40 / /tmp rw,relatime - tmpfs none rw`
const cgroup2Mountinfo = `18 64 0:18 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
19 64 0:4 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
20 64 0:6 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8171204k,nr_inodes=2042801,mode=755
21 18 0:19 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
22 20 0:20 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
23 20 0:21 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
24 64 0:22 / /run rw,nosuid,nodev shared:24 - tmpfs tmpfs rw,seclabel,mode=755
25 18 0:23 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs ro,seclabel,mode=755
26 25 0:24 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 - cgroup2 cgroup rw
27 18 0:25 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw,seclabel
28 18 0:26 / /sys/firmware/efi/efivars rw,nosuid,nodev,noexec,relatime shared:21 - efivarfs efivarfs rw
29 25 0:27 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,cpu,cpuacct
30 25 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,memory
31 25 0:29 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,net_cls,net_prio
32 25 0:30 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,blkio
33 25 0:31 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,perf_event
34 25 0:32 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,hugetlb
35 25 0:33 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,freezer
36 25 0:34 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,cpuset
37 25 0:35 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices
38 25 0:36 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,pids
61 18 0:37 / /sys/kernel/config rw,relatime shared:22 - configfs configfs rw
64 0 253:0 / / rw,relatime shared:1 - ext4 /dev/mapper/fedora_dhcp--16--129-root rw,seclabel,data=ordered
39 18 0:17 / /sys/fs/selinux rw,relatime shared:23 - selinuxfs selinuxfs rw
40 20 0:16 / /dev/mqueue rw,relatime shared:25 - mqueue mqueue rw,seclabel
41 20 0:39 / /dev/hugepages rw,relatime shared:26 - hugetlbfs hugetlbfs rw,seclabel
`
func TestGetCgroupMounts(t *testing.T) {
type testData struct {
mountInfo string
root string
subsystems map[string]bool
}
testTable := []testData{
{
mountInfo: fedoraMountinfo,
root: "/",
subsystems: map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
"hugetlb": true,
},
},
{
mountInfo: systemdMountinfo,
root: "/system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope",
subsystems: map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
},
},
}
for _, td := range testTable {
mi := bytes.NewBufferString(td.mountInfo)
cgMounts, err := getCgroupMountsHelper(td.subsystems, mi, false)
if err != nil {
t.Fatal(err)
}
cgMap := make(map[string]Mount)
for _, m := range cgMounts {
for _, ss := range m.Subsystems {
cgMap[ss] = m
}
}
for ss := range td.subsystems {
m, ok := cgMap[ss]
if !ok {
t.Fatalf("%s not found", ss)
}
if m.Root != td.root {
t.Fatalf("unexpected root for %s: %s", ss, m.Root)
}
if !strings.HasPrefix(m.Mountpoint, "/sys/fs/cgroup/") && !strings.Contains(m.Mountpoint, ss) {
t.Fatalf("unexpected mountpoint for %s: %s", ss, m.Mountpoint)
}
var ssFound bool
for _, mss := range m.Subsystems {
if mss == ss {
ssFound = true
break
}
}
if !ssFound {
t.Fatalf("subsystem %s not found in Subsystems field %v", ss, m.Subsystems)
}
}
}
}
func BenchmarkGetCgroupMounts(b *testing.B) {
subsystems := map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
"hugetlb": true,
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
mi := bytes.NewBufferString(fedoraMountinfo)
b.StartTimer()
if _, err := getCgroupMountsHelper(subsystems, mi, false); err != nil {
b.Fatal(err)
}
}
}
func TestParseCgroupString(t *testing.T) {
testCases := []struct {
input string
expectedError error
expectedOutput map[string]string
}{
{
// Taken from a CoreOS instance running systemd 225 with CPU/Mem
// accounting enabled in systemd
input: `9:blkio:/
8:freezer:/
7:perf_event:/
6:devices:/system.slice/system-sshd.slice
5:cpuset:/
4:cpu,cpuacct:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service
3:net_cls,net_prio:/
2:memory:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service
1:name=systemd:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service`,
expectedOutput: map[string]string{
"name=systemd": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
"blkio": "/",
"freezer": "/",
"perf_event": "/",
"devices": "/system.slice/system-sshd.slice",
"cpuset": "/",
"cpu": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
"cpuacct": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
"net_cls": "/",
"net_prio": "/",
"memory": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
},
},
{
input: `malformed input`,
expectedError: fmt.Errorf(`invalid cgroup entry: must contain at least two colons: malformed input`),
},
}
for ndx, testCase := range testCases {
out, err := parseCgroupFromReader(strings.NewReader(testCase.input))
if err != nil {
if testCase.expectedError == nil || testCase.expectedError.Error() != err.Error() {
t.Errorf("%v: expected error %v, got error %v", ndx, testCase.expectedError, err)
}
} else {
if !reflect.DeepEqual(testCase.expectedOutput, out) {
t.Errorf("%v: expected output %v, got error %v", ndx, testCase.expectedOutput, out)
}
}
}
}
func TestIgnoreCgroup2Mount(t *testing.T) {
subsystems := map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
"pids": true,
"name=systemd": true,
}
mi := bytes.NewBufferString(cgroup2Mountinfo)
cgMounts, err := getCgroupMountsHelper(subsystems, mi, false)
if err != nil {
t.Fatal(err)
}
for _, m := range cgMounts {
if m.Mountpoint == "/sys/fs/cgroup/systemd" {
t.Errorf("parsed a cgroup2 mount at /sys/fs/cgroup/systemd instead of ignoring it")
}
}
}
const fakeMountInfo = ` 18 24 0:17 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
100 99 1:31 / /foo/bar rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/baz2 rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/baz rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/bazza rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/baz3 rw,relatime - fake fake rw,fake
100 99 1:31 / /foo rw,relatime - fake fake rw,fake
100 99 1:31 / /unrelated rw,relatime - fake fake rw,fake
100 99 1:31 / / rw,relatime - fake fake rw,fake
`
func TestGetClosestMountpointAncestor(t *testing.T) {
testCases := []struct {
input string
mountinfos string
output string
}{
{input: "/foo/bar/baz/a/b/c", mountinfos: fakeMountInfo, output: "/foo/bar/baz"},
{input: "/foo/bar/baz", mountinfos: fakeMountInfo, output: "/foo/bar/baz"},
{input: "/foo/bar/bazza", mountinfos: fakeMountInfo, output: "/foo/bar/bazza"},
{input: "/a/b/c/d", mountinfos: fakeMountInfo, output: "/"},
}
for _, c := range testCases {
mountpoint := GetClosestMountpointAncestor(c.input, c.mountinfos)
if mountpoint != c.output {
t.Errorf("expected %s, got %s", c.output, mountpoint)
}
}
}

View file

@ -1,11 +0,0 @@
// +build linux,!go1.5
package libcontainer
import "syscall"
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
// with earlier versions
func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = false
}

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package configs
type FreezerState string
@ -22,7 +20,7 @@ type Cgroup struct {
// The path is assumed to be relative to the host system cgroup mountpoint.
Path string `json:"path"`
// ScopePrefix decribes prefix for the scope name
// ScopePrefix describes prefix for the scope name
ScopePrefix string `json:"scope_prefix"`
// Paths represent the absolute cgroups paths to join.
@ -36,7 +34,7 @@ type Cgroup struct {
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
// Deprecated
AllowAllDevices bool `json:"allow_all_devices,omitempty"`
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
@ -60,19 +58,19 @@ type Resources struct {
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
CpuShares uint64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
CpuPeriod uint64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_quota"`
CpuRtRuntime int64 `json:"cpu_rt_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_period"`
CpuRtPeriod uint64 `json:"cpu_rt_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
@ -95,7 +93,7 @@ type Resources struct {
// IO read rate limit per cgroup per device, bytes per second.
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
// IO write rate limit per cgroup per divice, bytes per second.
// IO write rate limit per cgroup per device, bytes per second.
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
// IO read rate limit per cgroup per device, IO per second.
@ -114,11 +112,11 @@ type Resources struct {
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness *int64 `json:"memory_swappiness"`
MemorySwappiness *uint64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid string `json:"net_cls_classid"`
NetClsClassid uint32 `json:"net_cls_classid_u"`
}

View file

@ -1,6 +0,0 @@
// +build !windows,!linux,!freebsd
package configs
type Cgroup struct {
}

View file

@ -7,7 +7,9 @@ import (
"os/exec"
"time"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type Rlimit struct {
@ -85,11 +87,6 @@ type Config struct {
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
@ -117,8 +114,8 @@ type Config struct {
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities"`
// All capabilities not specified will be dropped from the processes capability mask
Capabilities *Capabilities `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
@ -148,10 +145,6 @@ type Config struct {
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj int `json:"oom_score_adj"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
@ -187,6 +180,17 @@ type Config struct {
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
// to limit the resources (e.g., L3 cache) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
}
type Hooks struct {
@ -201,6 +205,19 @@ type Hooks struct {
Poststop []Hook
}
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
Bounding []string
// Effective is the set of capabilities checked by the kernel.
Effective []string
// Inheritable is the capabilities preserved across execve.
Inheritable []string
// Permitted is the limiting superset for effective capabilities.
Permitted []string
// Ambient is the ambient set of capabilities that are kept.
Ambient []string
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state struct {
Prestart []CommandHook
@ -248,13 +265,7 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
}
// HookState is the payload provided to a hook on execution.
type HookState struct {
Version string `json:"ociVersion"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
BundlePath string `json:"bundlePath"`
}
type HookState specs.State
type Hook interface {
// Run executes the hook with the provided state.
@ -300,29 +311,38 @@ func (c Command) Run(s HookState) error {
if err != nil {
return err
}
var stdout, stderr bytes.Buffer
cmd := exec.Cmd{
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
Stdout: &stdout,
Stderr: &stderr,
}
if err := cmd.Start(); err != nil {
return err
}
errC := make(chan error, 1)
go func() {
out, err := cmd.CombinedOutput()
err := cmd.Wait()
if err != nil {
err = fmt.Errorf("%s: %s", err, out)
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
}
errC <- err
}()
var timerCh <-chan time.Time
if c.Timeout != nil {
select {
case err := <-errC:
return err
case <-time.After(*c.Timeout):
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}
timer := time.NewTimer(*c.Timeout)
defer timer.Stop()
timerCh = timer.C
}
select {
case err := <-errC:
return err
case <-timerCh:
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}
return <-errC
}

View file

@ -0,0 +1,61 @@
package configs
import "fmt"
// HostUID gets the translated uid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootUID() (int, error) {
return c.HostUID(0)
}
// HostGID gets the translated gid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootGID() (int, error) {
return c.HostGID(0)
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View file

@ -0,0 +1,130 @@
package configs
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"testing"
)
func loadConfig(name string) (*Config, error) {
f, err := os.Open(filepath.Join("../sample_configs", name))
if err != nil {
return nil, err
}
defer f.Close()
var container *Config
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
// Check that a config doesn't contain extra fields
var configMap, abstractMap map[string]interface{}
if _, err := f.Seek(0, 0); err != nil {
return nil, err
}
if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
return nil, err
}
configData, err := json.Marshal(&container)
if err != nil {
return nil, err
}
if err := json.Unmarshal(configData, &configMap); err != nil {
return nil, err
}
for k := range configMap {
delete(abstractMap, k)
}
if len(abstractMap) != 0 {
return nil, fmt.Errorf("unknown fields: %s", abstractMap)
}
return container, nil
}
func TestRemoveNamespace(t *testing.T) {
ns := Namespaces{
{Type: NEWNET},
}
if !ns.Remove(NEWNET) {
t.Fatal("NEWNET was not removed")
}
if len(ns) != 0 {
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
}
}
func TestHostRootUIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostRootUID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
}
}
func TestHostRootUIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
UidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostRootUID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
}
}
func TestHostRootGIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostRootGID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
}
}
func TestHostRootGIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
GidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostRootGID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
}
}

View file

@ -0,0 +1,191 @@
package configs_test
import (
"encoding/json"
"fmt"
"os"
"reflect"
"testing"
"time"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestUnmarshalHooks(t *testing.T) {
timeout := time.Second
prestartCmd := configs.NewCommandHook(configs.Command{
Path: "/var/vcap/hooks/prestart",
Args: []string{"--pid=123"},
Env: []string{"FOO=BAR"},
Dir: "/var/vcap",
Timeout: &timeout,
})
prestart, err := json.Marshal(prestartCmd.Command)
if err != nil {
t.Fatal(err)
}
hook := configs.Hooks{}
err = hook.UnmarshalJSON([]byte(fmt.Sprintf(`{"Prestart" :[%s]}`, prestart)))
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(hook.Prestart[0], prestartCmd) {
t.Errorf("Expected prestart to equal %+v but it was %+v",
prestartCmd, hook.Prestart[0])
}
}
func TestUnmarshalHooksWithInvalidData(t *testing.T) {
hook := configs.Hooks{}
err := hook.UnmarshalJSON([]byte(`{invalid-json}`))
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestMarshalHooks(t *testing.T) {
timeout := time.Second
prestartCmd := configs.NewCommandHook(configs.Command{
Path: "/var/vcap/hooks/prestart",
Args: []string{"--pid=123"},
Env: []string{"FOO=BAR"},
Dir: "/var/vcap",
Timeout: &timeout,
})
hook := configs.Hooks{
Prestart: []configs.Hook{prestartCmd},
}
hooks, err := hook.MarshalJSON()
if err != nil {
t.Fatal(err)
}
h := `{"poststart":null,"poststop":null,"prestart":[{"path":"/var/vcap/hooks/prestart","args":["--pid=123"],"env":["FOO=BAR"],"dir":"/var/vcap","timeout":1000000000}]}`
if string(hooks) != h {
t.Errorf("Expected hooks %s to equal %s", string(hooks), h)
}
}
func TestMarshalUnmarshalHooks(t *testing.T) {
timeout := time.Second
prestart := configs.NewCommandHook(configs.Command{
Path: "/var/vcap/hooks/prestart",
Args: []string{"--pid=123"},
Env: []string{"FOO=BAR"},
Dir: "/var/vcap",
Timeout: &timeout,
})
hook := configs.Hooks{
Prestart: []configs.Hook{prestart},
}
hooks, err := hook.MarshalJSON()
if err != nil {
t.Fatal(err)
}
umMhook := configs.Hooks{}
err = umMhook.UnmarshalJSON(hooks)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(umMhook.Prestart[0], prestart) {
t.Errorf("Expected hooks to be equal after mashaling -> unmarshaling them: %+v, %+v", umMhook.Prestart[0], prestart)
}
}
func TestMarshalHooksWithUnexpectedType(t *testing.T) {
fHook := configs.NewFunctionHook(func(configs.HookState) error {
return nil
})
hook := configs.Hooks{
Prestart: []configs.Hook{fHook},
}
hooks, err := hook.MarshalJSON()
if err != nil {
t.Fatal(err)
}
h := `{"poststart":null,"poststop":null,"prestart":null}`
if string(hooks) != h {
t.Errorf("Expected hooks %s to equal %s", string(hooks), h)
}
}
func TestFuncHookRun(t *testing.T) {
state := configs.HookState{
Version: "1",
ID: "1",
Pid: 1,
Bundle: "/bundle",
}
fHook := configs.NewFunctionHook(func(s configs.HookState) error {
if !reflect.DeepEqual(state, s) {
t.Errorf("Expected state %+v to equal %+v", state, s)
}
return nil
})
fHook.Run(state)
}
func TestCommandHookRun(t *testing.T) {
state := configs.HookState{
Version: "1",
ID: "1",
Pid: 1,
Bundle: "/bundle",
}
timeout := time.Second
cmdHook := configs.NewCommandHook(configs.Command{
Path: os.Args[0],
Args: []string{os.Args[0], "-test.run=TestHelperProcess"},
Env: []string{"FOO=BAR"},
Dir: "/",
Timeout: &timeout,
})
err := cmdHook.Run(state)
if err != nil {
t.Errorf(fmt.Sprintf("Expected error to not occur but it was %+v", err))
}
}
func TestCommandHookRunTimeout(t *testing.T) {
state := configs.HookState{
Version: "1",
ID: "1",
Pid: 1,
Bundle: "/bundle",
}
timeout := (10 * time.Millisecond)
cmdHook := configs.NewCommandHook(configs.Command{
Path: os.Args[0],
Args: []string{os.Args[0], "-test.run=TestHelperProcessWithTimeout"},
Env: []string{"FOO=BAR"},
Dir: "/",
Timeout: &timeout,
})
err := cmdHook.Run(state)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestHelperProcess(*testing.T) {
fmt.Println("Helper Process")
os.Exit(0)
}
func TestHelperProcessWithTimeout(*testing.T) {
time.Sleep(time.Second)
}

View file

@ -1,51 +0,0 @@
// +build freebsd linux
package configs
import "fmt"
// HostUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// HostGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
}
return id, nil
}
// Return default root gid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View file

@ -0,0 +1,3 @@
package configs
// All current tests are for Unix-specific functionality

View file

@ -1,4 +1,4 @@
// +build linux freebsd
// +build linux
package configs
@ -107,19 +107,5 @@ var (
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/docker/docker/issues/514
// https://github.com/docker/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
Major: 10,
Minor: 229,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
)

View file

@ -0,0 +1,7 @@
package configs
type IntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}

View file

@ -1,5 +1,11 @@
package configs
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota
)
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
@ -22,6 +28,9 @@ type Mount struct {
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package configs
import (
@ -22,8 +20,8 @@ var (
supportedNamespaces = make(map[NamespaceType]bool)
)
// nsToFile converts the namespace type to its filename
func nsToFile(ns NamespaceType) string {
// NsName converts the namespace type to its filename
func NsName(ns NamespaceType) string {
switch ns {
case NEWNET:
return "net"
@ -50,7 +48,7 @@ func IsNamespaceSupported(ns NamespaceType) bool {
if ok {
return supported
}
nsFile := nsToFile(ns)
nsFile := NsName(ns)
// if the namespace type is unknown, just return false
if nsFile == "" {
return false
@ -64,12 +62,12 @@ func IsNamespaceSupported(ns NamespaceType) bool {
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWUSER, // Keep user NS always first, don't move it.
NEWIPC,
NEWUTS,
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}
@ -81,10 +79,7 @@ type Namespace struct {
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type))
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
}
func (n *Namespaces) Remove(t NamespaceType) bool {

View file

@ -2,19 +2,19 @@
package configs
import "syscall"
import "golang.org/x/sys/unix"
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: syscall.CLONE_NEWNET,
NEWNS: syscall.CLONE_NEWNS,
NEWUSER: syscall.CLONE_NEWUSER,
NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID,
NEWNET: unix.CLONE_NEWNET,
NEWNS: unix.CLONE_NEWNS,
NEWUSER: unix.CLONE_NEWUSER,
NEWIPC: unix.CLONE_NEWIPC,
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
}
// CloneFlags parses the container's Namespaces options to set the correct

View file

@ -4,12 +4,10 @@ package configs
func (n *Namespace) Syscall() int {
panic("No namespace syscall support")
return 0
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
return uintptr(0)
}

View file

@ -1,4 +1,4 @@
// +build !linux,!freebsd
// +build !linux
package configs

View file

@ -0,0 +1,117 @@
package validate
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
geteuid = os.Geteuid
getegid = os.Getegid
)
func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this
// has to be verified by setupUser() in init_linux.go.
return nil
}
func hasIDMapping(id int, mappings []configs.IDMap) bool {
for _, m := range mappings {
if id >= m.ContainerID && id < m.ContainerID+m.Size {
return true
}
}
return false
}
func rootlessMappings(config *configs.Config) error {
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
}
if len(config.UidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one UID mapping")
}
if len(config.GidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one UID mapping")
}
return nil
}
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
}
// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())
// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
}
return nil
}
// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
for _, mount := range config.Mounts {
// Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") {
var uid int
n, err := fmt.Sscanf(opt, "uid=%d", &uid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(uid, config.UidMappings) {
return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
}
}
if strings.HasPrefix(opt, "gid=") {
var gid int
n, err := fmt.Sscanf(opt, "gid=%d", &gid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(gid, config.GidMappings) {
return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
}
}
}
}
return nil
}

View file

@ -0,0 +1,159 @@
package validate
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func init() {
geteuid = func() int { return 1337 }
getegid = func() int { return 7331 }
}
func rootlessConfig() *configs.Config {
return &configs.Config{
Rootfs: "/var",
Rootless: true,
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUSER},
},
),
UidMappings: []configs.IDMap{
{
HostID: geteuid(),
ContainerID: 0,
Size: 1,
},
},
GidMappings: []configs.IDMap{
{
HostID: getegid(),
ContainerID: 0,
Size: 1,
},
},
}
}
func TestValidateRootless(t *testing.T) {
validator := New()
config := rootlessConfig()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
/* rootlessMappings() */
func TestValidateRootlessUserns(t *testing.T) {
validator := New()
config := rootlessConfig()
config.Namespaces = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if user namespaces not set")
}
}
func TestValidateRootlessMappingUid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.UidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no uid mappings provided")
}
}
func TestValidateRootlessMappingGid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.GidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no gid mappings provided")
}
}
/* rootlessMount() */
func TestValidateRootlessMountUid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "uid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting uid=5 in mount options")
}
config.Mounts[0].Data = "uid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err)
}
config.Mounts[0].Data = "uid=2"
config.UidMappings[0].Size = 10
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting uid=2 in mount options and UidMapping[0].size is 10")
}
config.Mounts[0].Data = "uid=20"
config.UidMappings[0].Size = 10
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting uid=20 in mount options and UidMapping[0].size is 10")
}
}
func TestValidateRootlessMountGid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "gid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting gid=5 in mount options")
}
config.Mounts[0].Data = "gid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err)
}
config.Mounts[0].Data = "gid=5"
config.GidMappings[0].Size = 10
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting gid=5 in mount options and GidMapping[0].size is 10")
}
config.Mounts[0].Data = "gid=11"
config.GidMappings[0].Size = 10
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting gid=11 in mount options and GidMapping[0].size is 10")
}
}

View file

@ -7,6 +7,8 @@ import (
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
selinux "github.com/opencontainers/selinux/go-selinux"
)
type Validator interface {
@ -39,12 +41,26 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.sysctl(config); err != nil {
return err
}
if err := v.intelrdt(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
}
}
return nil
}
// rootfs validates if the rootfs is an absolute path and is not a symlink
// to the container's root filesystem.
func (v *ConfigValidator) rootfs(config *configs.Config) error {
if _, err := os.Stat(config.Rootfs); err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
}
return err
}
cleaned, err := filepath.Abs(config.Rootfs)
if err != nil {
return err
@ -80,6 +96,10 @@ func (v *ConfigValidator) security(config *configs.Config) error {
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
if config.ProcessLabel != "" && !selinux.GetEnabled() {
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
}
return nil
}
@ -121,6 +141,11 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
}
if strings.HasPrefix(s, "net.") {
if config.Namespaces.Contains(configs.NEWNET) {
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
if err := checkHostNs(s, path); err != nil {
return err
}
}
continue
} else {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
@ -131,3 +156,57 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
return nil
}
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
if config.IntelRdt != nil {
if !intelrdt.IsEnabled() {
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
}
if config.IntelRdt.L3CacheSchema == "" {
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
}
}
return nil
}
func isSymbolicLink(path string) (bool, error) {
fi, err := os.Lstat(path)
if err != nil {
return false, err
}
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
}
// checkHostNs checks whether network sysctl is used in host namespace.
func checkHostNs(sysctlConfig string, path string) error {
var currentProcessNetns = "/proc/self/ns/net"
// readlink on the current processes network namespace
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
if err != nil {
return fmt.Errorf("read soft link %q error", currentProcessNetns)
}
// First check if the provided path is a symbolic link
symLink, err := isSymbolicLink(path)
if err != nil {
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
}
if symLink == false {
// The provided namespace is not a symbolic link,
// it is not the host namespace.
return nil
}
// readlink on the path provided in the struct
destOfContainer, err := os.Readlink(path)
if err != nil {
return fmt.Errorf("read soft link %q error", path)
}
if destOfContainer == destOfCurrentProcess {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
}
return nil
}

View file

@ -0,0 +1,267 @@
package validate_test
import (
"os"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
func TestValidate(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateWithInvalidRootfs(t *testing.T) {
dir := "rootfs"
os.Symlink("/var", dir)
defer os.Remove(dir)
config := &configs.Config{
Rootfs: dir,
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateNetworkWithoutNETNamespace(t *testing.T) {
network := &configs.Network{Type: "loopback"}
config := &configs.Config{
Rootfs: "/var",
Namespaces: []configs.Namespace{},
Networks: []*configs.Network{network},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateNetworkRoutesWithoutNETNamespace(t *testing.T) {
route := &configs.Route{Gateway: "255.255.255.0"}
config := &configs.Config{
Rootfs: "/var",
Namespaces: []configs.Namespace{},
Routes: []*configs.Route{route},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateHostname(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Hostname: "runc",
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUTS},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateHostnameWithoutUTSNamespace(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Hostname: "runc",
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateSecurityWithMaskPaths(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
MaskPaths: []string{"/proc/kcore"},
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWNS},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateSecurityWithROPaths(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
ReadonlyPaths: []string{"/proc/sys"},
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWNS},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateSecurityWithoutNEWNS(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
MaskPaths: []string{"/proc/kcore"},
ReadonlyPaths: []string{"/proc/sys"},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateUsernamespace(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
config := &configs.Config{
Rootfs: "/var",
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUSER},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("expected error to not occur %+v", err)
}
}
func TestValidateUsernamespaceWithoutUserNS(t *testing.T) {
uidMap := configs.IDMap{ContainerID: 123}
config := &configs.Config{
Rootfs: "/var",
UidMappings: []configs.IDMap{uidMap},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateSysctl(t *testing.T) {
sysctl := map[string]string{
"fs.mqueue.ctl": "ctl",
"net.ctl": "ctl",
"kernel.ctl": "ctl",
}
for k, v := range sysctl {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{k: v},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
}
func TestValidateValidSysctl(t *testing.T) {
sysctl := map[string]string{
"fs.mqueue.ctl": "ctl",
"net.ctl": "ctl",
"kernel.msgmax": "ctl",
}
for k, v := range sysctl {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{k: v},
Namespaces: []configs.Namespace{
{
Type: configs.NEWNET,
},
{
Type: configs.NEWIPC,
},
},
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur with {%s=%s} but got: %q", k, v, err)
}
}
}
func TestValidateSysctlWithSameNs(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{"net.ctl": "ctl"},
Namespaces: configs.Namespaces(
[]configs.Namespace{
{
Type: configs.NEWNET,
Path: "/proc/self/ns/net",
},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateSysctlWithoutNETNamespace(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{"net.ctl": "ctl"},
Namespaces: []configs.Namespace{},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}

View file

@ -1,15 +0,0 @@
package libcontainer
import "io"
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriter
io.Closer
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
Fd() uintptr
}

View file

@ -1,13 +0,0 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View file

@ -1,145 +1,41 @@
package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/opencontainers/runc/libcontainer/label"
"golang.org/x/sys/unix"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// newConsoleFromPath is an internal function returning an initialized console for use inside
// a container's MNT namespace.
func newConsoleFromPath(slavePath string) *linuxConsole {
return &linuxConsole{
slavePath: slavePath,
}
}
// linuxConsole is a linux psuedo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) Fd() uintptr {
return c.master.Fd()
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
func mountConsole(slavePath string) error {
oldMask := unix.Umask(0000)
defer unix.Umask(oldMask)
f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(syscall.O_RDWR)
func dupStdio(slavePath string) error {
fd, err := unix.Open(slavePath, unix.O_RDWR, 0)
if err != nil {
return err
return &os.PathError{
Op: "open",
Path: slavePath,
Err: err,
}
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := syscall.Dup3(fd, i, 0); err != nil {
if err := unix.Dup3(fd, i, 0); err != nil {
return err
}
}
return nil
}
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := syscall.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

View file

@ -1,11 +0,0 @@
package libcontainer
import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View file

@ -1,30 +0,0 @@
package libcontainer
// NewConsole returns an initalized console that can be used within a container
func NewConsole(uid, gid int) (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows psuedo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View file

@ -15,20 +15,16 @@ import (
type Status int
const (
// Created is the status that denotes the container exists but has not been run yet
// Created is the status that denotes the container exists but has not been run yet.
Created Status = iota
// Created is the status that denotes the container exists and is running.
// Running is the status that denotes the container exists and is running.
Running
// Pausing is the status that denotes the container exists, it is in the process of being paused.
Pausing
// Paused is the status that denotes the container exists, but all its processes are paused.
Paused
// Destroyed is the status that denotes the container does not exist.
Destroyed
// Stopped is the status that denotes the container does not have a created or running process.
Stopped
)
func (s Status) String() string {
@ -41,8 +37,8 @@ func (s Status) String() string {
return "pausing"
case Paused:
return "paused"
case Destroyed:
return "destroyed"
case Stopped:
return "stopped"
default:
return "unknown"
}
@ -58,7 +54,7 @@ type BaseState struct {
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time in clock cycles since boot time.
InitProcessStartTime string `json:"init_process_start"`
InitProcessStartTime uint64 `json:"init_process_start"`
// Created is the unix timestamp for the creation time of the container in UTC
Created time.Time `json:"created"`
@ -79,14 +75,14 @@ type BaseContainer interface {
// Returns the current status of the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
State() (*State, error)
// Returns the current config of the container.
@ -95,7 +91,7 @@ type BaseContainer interface {
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
@ -105,7 +101,7 @@ type BaseContainer interface {
// Returns statistics for the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Stats() (*Stats, error)
@ -114,31 +110,57 @@ type BaseContainer interface {
// We can use this to change resources when containers are running.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
// start. You can track process lifecycle with passed Process structure.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// Systemerror - System error.
// SystemError - System error.
Start(process *Process) (err error)
// Destroys the container after killing all running processes.
// Run immediately starts the process inside the container. Returns error if process
// fails to start. It does not block waiting for the exec fifo after start returns but
// opens the fifo after start returns.
//
// errors:
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Run(process *Process) (err error)
// Destroys the container, if its in a valid state, after killing any
// remaining running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// Running containers must first be stopped using Signal(..).
// Paused containers must first be resumed using Resume(..).
//
// errors:
// Systemerror - System error.
// ContainerNotStopped - Container is still running,
// ContainerPaused - Container is paused,
// SystemError - System error.
Destroy() error
// Signal sends the provided signal code to the container's initial process.
//
// If all is specified the signal is sent to all processes in the container
// including the initial process.
//
// errors:
// Systemerror - System error.
Signal(s os.Signal) error
// SystemError - System error.
Signal(s os.Signal, all bool) error
// Exec signals the container to exec the users process at the end of the init.
//
// errors:
// SystemError - System error.
Exec() error
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,345 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
)
type mockCgroupManager struct {
pids []int
allPids []int
stats *cgroups.Stats
paths map[string]string
}
type mockIntelRdtManager struct {
stats *intelrdt.Stats
path string
}
func (m *mockCgroupManager) GetPids() ([]int, error) {
return m.pids, nil
}
func (m *mockCgroupManager) GetAllPids() ([]int, error) {
return m.allPids, nil
}
func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) {
return m.stats, nil
}
func (m *mockCgroupManager) Apply(pid int) error {
return nil
}
func (m *mockCgroupManager) Set(container *configs.Config) error {
return nil
}
func (m *mockCgroupManager) Destroy() error {
return nil
}
func (m *mockCgroupManager) GetPaths() map[string]string {
return m.paths
}
func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
return nil
}
func (m *mockIntelRdtManager) Apply(pid int) error {
return nil
}
func (m *mockIntelRdtManager) GetStats() (*intelrdt.Stats, error) {
return m.stats, nil
}
func (m *mockIntelRdtManager) Destroy() error {
return nil
}
func (m *mockIntelRdtManager) GetPath() string {
return m.path
}
func (m *mockIntelRdtManager) Set(container *configs.Config) error {
return nil
}
type mockProcess struct {
_pid int
started uint64
}
func (m *mockProcess) terminate() error {
return nil
}
func (m *mockProcess) pid() int {
return m._pid
}
func (m *mockProcess) startTime() (uint64, error) {
return m.started, nil
}
func (m *mockProcess) start() error {
return nil
}
func (m *mockProcess) wait() (*os.ProcessState, error) {
return nil, nil
}
func (m *mockProcess) signal(_ os.Signal) error {
return nil
}
func (m *mockProcess) externalDescriptors() []string {
return []string{}
}
func (m *mockProcess) setExternalDescriptors(newFds []string) {
}
func TestGetContainerPids(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{allPids: []int{1, 2, 3}},
}
pids, err := container.Processes()
if err != nil {
t.Fatal(err)
}
for i, expected := range []int{1, 2, 3} {
if pids[i] != expected {
t.Fatalf("expected pid %d but received %d", expected, pids[i])
}
}
}
func TestGetContainerStats(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
},
intelRdtManager: &mockIntelRdtManager{
stats: &intelrdt.Stats{
L3CacheSchema: "L3:0=f;1=f0",
},
},
}
stats, err := container.Stats()
if err != nil {
t.Fatal(err)
}
if stats.CgroupStats == nil {
t.Fatal("cgroup stats are nil")
}
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage)
}
if intelrdt.IsEnabled() {
if stats.IntelRdtStats == nil {
t.Fatal("intel rdt stats are nil")
}
if stats.IntelRdtStats.L3CacheSchema != "L3:0=f;1=f0" {
t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but recevied %s", stats.IntelRdtStats.L3CacheSchema)
}
}
}
func TestGetContainerState(t *testing.T) {
var (
pid = os.Getpid()
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
expectedNetworkPath = fmt.Sprintf("/proc/%d/ns/net", pid)
expectedIntelRdtPath = "/sys/fs/resctrl/myid"
)
container := &linuxContainer{
id: "myid",
config: &configs.Config{
Namespaces: []configs.Namespace{
{Type: configs.NEWPID},
{Type: configs.NEWNS},
{Type: configs.NEWNET, Path: expectedNetworkPath},
{Type: configs.NEWUTS},
// emulate host for IPC
//{Type: configs.NEWIPC},
},
},
initProcess: &mockProcess{
_pid: pid,
started: 10,
},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
paths: map[string]string{
"memory": expectedMemoryPath,
},
},
intelRdtManager: &mockIntelRdtManager{
stats: &intelrdt.Stats{
L3CacheSchema: "L3:0=f0;1=f",
},
path: expectedIntelRdtPath,
},
}
container.state = &createdState{c: container}
state, err := container.State()
if err != nil {
t.Fatal(err)
}
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != 10 {
t.Fatalf("expected process start time 10 but received %d", state.InitProcessStartTime)
}
paths := state.CgroupPaths
if paths == nil {
t.Fatal("cgroup paths should not be nil")
}
if memPath := paths["memory"]; memPath != expectedMemoryPath {
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
}
if intelrdt.IsEnabled() {
intelRdtPath := state.IntelRdtPath
if intelRdtPath == "" {
t.Fatal("intel rdt path should not be empty")
}
if intelRdtPath != expectedIntelRdtPath {
t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath)
}
}
for _, ns := range container.config.Namespaces {
path := state.NamespacePaths[ns.Type]
if path == "" {
t.Fatalf("expected non nil namespace path for %s", ns.Type)
}
if ns.Type == configs.NEWNET {
if path != expectedNetworkPath {
t.Fatalf("expected path %q but received %q", expectedNetworkPath, path)
}
} else {
file := ""
switch ns.Type {
case configs.NEWNET:
file = "net"
case configs.NEWNS:
file = "mnt"
case configs.NEWPID:
file = "pid"
case configs.NEWIPC:
file = "ipc"
case configs.NEWUSER:
file = "user"
case configs.NEWUTS:
file = "uts"
}
expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file)
if expected != path {
t.Fatalf("expected path %q but received %q", expected, path)
}
}
}
}
func TestGetContainerStateAfterUpdate(t *testing.T) {
var (
pid = os.Getpid()
)
stat, err := system.Stat(pid)
if err != nil {
t.Fatal(err)
}
rootDir, err := ioutil.TempDir("", "TestGetContainerStateAfterUpdate")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(rootDir)
container := &linuxContainer{
root: rootDir,
id: "myid",
config: &configs.Config{
Namespaces: []configs.Namespace{
{Type: configs.NEWPID},
{Type: configs.NEWNS},
{Type: configs.NEWNET},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
},
Cgroups: &configs.Cgroup{
Resources: &configs.Resources{
Memory: 1024,
},
},
},
initProcess: &mockProcess{
_pid: pid,
started: stat.StartTime,
},
cgroupManager: &mockCgroupManager{},
}
container.state = &createdState{c: container}
state, err := container.State()
if err != nil {
t.Fatal(err)
}
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != stat.StartTime {
t.Fatalf("expected process start time %d but received %d", stat.StartTime, state.InitProcessStartTime)
}
if state.Config.Cgroups.Resources.Memory != 1024 {
t.Fatalf("expected Memory to be 1024 but received %q", state.Config.Cgroups.Memory)
}
// Set initProcessStartTime so we fake to be running
container.initProcessStartTime = state.InitProcessStartTime
container.state = &runningState{c: container}
newConfig := container.Config()
newConfig.Cgroups.Resources.Memory = 2048
if err := container.Set(newConfig); err != nil {
t.Fatal(err)
}
state, err = container.State()
if err != nil {
t.Fatal(err)
}
if state.Config.Cgroups.Resources.Memory != 2048 {
t.Fatalf("expected Memory to be 2048 but received %q", state.Config.Cgroups.Memory)
}
}

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package libcontainer
// cgroup restoring strategy provided by criu
@ -25,13 +23,18 @@ type VethPairName struct {
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
ParentImage string // directory for storing parent image files in pre-dump and dump
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
FileLocks bool // handle file locks, for safety
PreDump bool // call criu predump to perform iterative checkpoint
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
AutoDedup bool // auto deduplication for incremental dumps
LazyPages bool // restore memory pages lazily using userfaultfd
StatusFd string // fd for feedback when lazy server is ready
}

View file

@ -1,6 +0,0 @@
package libcontainer
// TODO Windows: This can ultimately be entirely factored out as criu is
// a Unix concept not relevant on Windows.
type CriuOpts struct {
}

View file

@ -0,0 +1,2 @@
gen: criurpc.proto
protoc --go_out=. criurpc.proto

View file

@ -12,6 +12,7 @@ It has these top-level messages:
CriuPageServerInfo
CriuVethPair
ExtMountMap
JoinNamespace
InheritFd
CgroupRoot
UnixSk
@ -22,21 +23,30 @@ It has these top-level messages:
CriuFeatures
CriuReq
CriuResp
CriuVersion
*/
package criurpc
import proto "github.com/golang/protobuf/proto"
import fmt "fmt"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
type CriuCgMode int32
const (
CriuCgMode_IGNORE CriuCgMode = 0
CriuCgMode_NONE CriuCgMode = 1
CriuCgMode_CG_NONE CriuCgMode = 1
CriuCgMode_PROPS CriuCgMode = 2
CriuCgMode_SOFT CriuCgMode = 3
CriuCgMode_FULL CriuCgMode = 4
@ -46,7 +56,7 @@ const (
var CriuCgMode_name = map[int32]string{
0: "IGNORE",
1: "NONE",
1: "CG_NONE",
2: "PROPS",
3: "SOFT",
4: "FULL",
@ -55,7 +65,7 @@ var CriuCgMode_name = map[int32]string{
}
var CriuCgMode_value = map[string]int32{
"IGNORE": 0,
"NONE": 1,
"CG_NONE": 1,
"PROPS": 2,
"SOFT": 3,
"FULL": 4,
@ -79,6 +89,7 @@ func (x *CriuCgMode) UnmarshalJSON(data []byte) error {
*x = CriuCgMode(value)
return nil
}
func (CriuCgMode) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{0} }
type CriuReqType int32
@ -93,19 +104,21 @@ const (
CriuReqType_CPUINFO_DUMP CriuReqType = 7
CriuReqType_CPUINFO_CHECK CriuReqType = 8
CriuReqType_FEATURE_CHECK CriuReqType = 9
CriuReqType_VERSION CriuReqType = 10
)
var CriuReqType_name = map[int32]string{
0: "EMPTY",
1: "DUMP",
2: "RESTORE",
3: "CHECK",
4: "PRE_DUMP",
5: "PAGE_SERVER",
6: "NOTIFY",
7: "CPUINFO_DUMP",
8: "CPUINFO_CHECK",
9: "FEATURE_CHECK",
0: "EMPTY",
1: "DUMP",
2: "RESTORE",
3: "CHECK",
4: "PRE_DUMP",
5: "PAGE_SERVER",
6: "NOTIFY",
7: "CPUINFO_DUMP",
8: "CPUINFO_CHECK",
9: "FEATURE_CHECK",
10: "VERSION",
}
var CriuReqType_value = map[string]int32{
"EMPTY": 0,
@ -118,6 +131,7 @@ var CriuReqType_value = map[string]int32{
"CPUINFO_DUMP": 7,
"CPUINFO_CHECK": 8,
"FEATURE_CHECK": 9,
"VERSION": 10,
}
func (x CriuReqType) Enum() *CriuReqType {
@ -136,6 +150,7 @@ func (x *CriuReqType) UnmarshalJSON(data []byte) error {
*x = CriuReqType(value)
return nil
}
func (CriuReqType) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{1} }
type CriuPageServerInfo struct {
Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"`
@ -145,9 +160,10 @@ type CriuPageServerInfo struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
func (*CriuPageServerInfo) ProtoMessage() {}
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
func (*CriuPageServerInfo) ProtoMessage() {}
func (*CriuPageServerInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} }
func (m *CriuPageServerInfo) GetAddress() string {
if m != nil && m.Address != nil {
@ -178,14 +194,15 @@ func (m *CriuPageServerInfo) GetFd() int32 {
}
type CriuVethPair struct {
IfIn *string `protobuf:"bytes,1,req,name=if_in" json:"if_in,omitempty"`
IfOut *string `protobuf:"bytes,2,req,name=if_out" json:"if_out,omitempty"`
IfIn *string `protobuf:"bytes,1,req,name=if_in,json=ifIn" json:"if_in,omitempty"`
IfOut *string `protobuf:"bytes,2,req,name=if_out,json=ifOut" json:"if_out,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
func (*CriuVethPair) ProtoMessage() {}
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
func (*CriuVethPair) ProtoMessage() {}
func (*CriuVethPair) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} }
func (m *CriuVethPair) GetIfIn() string {
if m != nil && m.IfIn != nil {
@ -207,9 +224,10 @@ type ExtMountMap struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
func (*ExtMountMap) ProtoMessage() {}
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
func (*ExtMountMap) ProtoMessage() {}
func (*ExtMountMap) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} }
func (m *ExtMountMap) GetKey() string {
if m != nil && m.Key != nil {
@ -225,15 +243,49 @@ func (m *ExtMountMap) GetVal() string {
return ""
}
type JoinNamespace struct {
Ns *string `protobuf:"bytes,1,req,name=ns" json:"ns,omitempty"`
NsFile *string `protobuf:"bytes,2,req,name=ns_file,json=nsFile" json:"ns_file,omitempty"`
ExtraOpt *string `protobuf:"bytes,3,opt,name=extra_opt,json=extraOpt" json:"extra_opt,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *JoinNamespace) Reset() { *m = JoinNamespace{} }
func (m *JoinNamespace) String() string { return proto.CompactTextString(m) }
func (*JoinNamespace) ProtoMessage() {}
func (*JoinNamespace) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} }
func (m *JoinNamespace) GetNs() string {
if m != nil && m.Ns != nil {
return *m.Ns
}
return ""
}
func (m *JoinNamespace) GetNsFile() string {
if m != nil && m.NsFile != nil {
return *m.NsFile
}
return ""
}
func (m *JoinNamespace) GetExtraOpt() string {
if m != nil && m.ExtraOpt != nil {
return *m.ExtraOpt
}
return ""
}
type InheritFd struct {
Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"`
Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *InheritFd) Reset() { *m = InheritFd{} }
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
func (*InheritFd) ProtoMessage() {}
func (m *InheritFd) Reset() { *m = InheritFd{} }
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
func (*InheritFd) ProtoMessage() {}
func (*InheritFd) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} }
func (m *InheritFd) GetKey() string {
if m != nil && m.Key != nil {
@ -255,9 +307,10 @@ type CgroupRoot struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
func (*CgroupRoot) ProtoMessage() {}
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
func (*CgroupRoot) ProtoMessage() {}
func (*CgroupRoot) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} }
func (m *CgroupRoot) GetCtrl() string {
if m != nil && m.Ctrl != nil {
@ -278,9 +331,10 @@ type UnixSk struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *UnixSk) Reset() { *m = UnixSk{} }
func (m *UnixSk) String() string { return proto.CompactTextString(m) }
func (*UnixSk) ProtoMessage() {}
func (m *UnixSk) Reset() { *m = UnixSk{} }
func (m *UnixSk) String() string { return proto.CompactTextString(m) }
func (*UnixSk) ProtoMessage() {}
func (*UnixSk) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} }
func (m *UnixSk) GetInode() uint32 {
if m != nil && m.Inode != nil {
@ -290,51 +344,62 @@ func (m *UnixSk) GetInode() uint32 {
}
type CriuOpts struct {
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"`
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"`
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"`
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"`
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"`
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"`
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"`
LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"`
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"`
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"`
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"`
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"`
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"`
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"`
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"`
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"`
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"`
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"`
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"`
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"`
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"`
AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt" json:"auto_ext_mnt,omitempty"`
ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing" json:"ext_sharing,omitempty"`
ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters" json:"ext_masters,omitempty"`
SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt" json:"skip_mnt,omitempty"`
EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs" json:"enable_fs,omitempty"`
UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino" json:"unix_sk_ino,omitempty"`
ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"`
GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,def=1048576" json:"ghost_limit,omitempty"`
IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths" json:"irmap_scan_paths,omitempty"`
External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"`
EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns" json:"empty_ns,omitempty"`
NoSeccomp *bool `protobuf:"varint,39,opt,name=no_seccomp" json:"no_seccomp,omitempty"`
XXX_unrecognized []byte `json:"-"`
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd,json=imagesDirFd" json:"images_dir_fd,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running,json=leaveRunning" json:"leave_running,omitempty"`
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk,json=extUnixSk" json:"ext_unix_sk,omitempty"`
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established,json=tcpEstablished" json:"tcp_established,omitempty"`
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices,json=evasiveDevices" json:"evasive_devices,omitempty"`
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job,json=shellJob" json:"shell_job,omitempty"`
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks,json=fileLocks" json:"file_locks,omitempty"`
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,json=logLevel,def=2" json:"log_level,omitempty"`
LogFile *string `protobuf:"bytes,10,opt,name=log_file,json=logFile" json:"log_file,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts,json=notifyScripts" json:"notify_scripts,omitempty"`
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img,json=parentImg" json:"parent_img,omitempty"`
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem,json=trackMem" json:"track_mem,omitempty"`
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup,json=autoDedup" json:"auto_dedup,omitempty"`
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd,json=workDirFd" json:"work_dir_fd,omitempty"`
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap,json=linkRemap" json:"link_remap,omitempty"`
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,json=cpuCap,def=4294967295" json:"cpu_cap,omitempty"`
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap,json=forceIrmap" json:"force_irmap,omitempty"`
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd,json=execCmd" json:"exec_cmd,omitempty"`
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt,json=extMnt" json:"ext_mnt,omitempty"`
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups,json=manageCgroups" json:"manage_cgroups,omitempty"`
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root,json=cgRoot" json:"cg_root,omitempty"`
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling,json=rstSibling" json:"rst_sibling,omitempty"`
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd,json=inheritFd" json:"inherit_fd,omitempty"`
AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt,json=autoExtMnt" json:"auto_ext_mnt,omitempty"`
ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing,json=extSharing" json:"ext_sharing,omitempty"`
ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters,json=extMasters" json:"ext_masters,omitempty"`
SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt,json=skipMnt" json:"skip_mnt,omitempty"`
EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs,json=enableFs" json:"enable_fs,omitempty"`
UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino,json=unixSkIno" json:"unix_sk_ino,omitempty"`
ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,json=manageCgroupsMode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"`
GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,json=ghostLimit,def=1048576" json:"ghost_limit,omitempty"`
IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths,json=irmapScanPaths" json:"irmap_scan_paths,omitempty"`
External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"`
EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns,json=emptyNs" json:"empty_ns,omitempty"`
JoinNs []*JoinNamespace `protobuf:"bytes,39,rep,name=join_ns,json=joinNs" json:"join_ns,omitempty"`
CgroupProps *string `protobuf:"bytes,41,opt,name=cgroup_props,json=cgroupProps" json:"cgroup_props,omitempty"`
CgroupPropsFile *string `protobuf:"bytes,42,opt,name=cgroup_props_file,json=cgroupPropsFile" json:"cgroup_props_file,omitempty"`
CgroupDumpController []string `protobuf:"bytes,43,rep,name=cgroup_dump_controller,json=cgroupDumpController" json:"cgroup_dump_controller,omitempty"`
FreezeCgroup *string `protobuf:"bytes,44,opt,name=freeze_cgroup,json=freezeCgroup" json:"freeze_cgroup,omitempty"`
Timeout *uint32 `protobuf:"varint,45,opt,name=timeout" json:"timeout,omitempty"`
TcpSkipInFlight *bool `protobuf:"varint,46,opt,name=tcp_skip_in_flight,json=tcpSkipInFlight" json:"tcp_skip_in_flight,omitempty"`
WeakSysctls *bool `protobuf:"varint,47,opt,name=weak_sysctls,json=weakSysctls" json:"weak_sysctls,omitempty"`
LazyPages *bool `protobuf:"varint,48,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"`
StatusFd *int32 `protobuf:"varint,49,opt,name=status_fd,json=statusFd" json:"status_fd,omitempty"`
OrphanPtsMaster *bool `protobuf:"varint,50,opt,name=orphan_pts_master,json=orphanPtsMaster" json:"orphan_pts_master,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
func (*CriuOpts) ProtoMessage() {}
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
func (*CriuOpts) ProtoMessage() {}
func (*CriuOpts) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} }
const Default_CriuOpts_LogLevel int32 = 2
const Default_CriuOpts_CpuCap uint32 = 4294967295
@ -606,9 +671,79 @@ func (m *CriuOpts) GetEmptyNs() uint32 {
return 0
}
func (m *CriuOpts) GetNoSeccomp() bool {
if m != nil && m.NoSeccomp != nil {
return *m.NoSeccomp
func (m *CriuOpts) GetJoinNs() []*JoinNamespace {
if m != nil {
return m.JoinNs
}
return nil
}
func (m *CriuOpts) GetCgroupProps() string {
if m != nil && m.CgroupProps != nil {
return *m.CgroupProps
}
return ""
}
func (m *CriuOpts) GetCgroupPropsFile() string {
if m != nil && m.CgroupPropsFile != nil {
return *m.CgroupPropsFile
}
return ""
}
func (m *CriuOpts) GetCgroupDumpController() []string {
if m != nil {
return m.CgroupDumpController
}
return nil
}
func (m *CriuOpts) GetFreezeCgroup() string {
if m != nil && m.FreezeCgroup != nil {
return *m.FreezeCgroup
}
return ""
}
func (m *CriuOpts) GetTimeout() uint32 {
if m != nil && m.Timeout != nil {
return *m.Timeout
}
return 0
}
func (m *CriuOpts) GetTcpSkipInFlight() bool {
if m != nil && m.TcpSkipInFlight != nil {
return *m.TcpSkipInFlight
}
return false
}
func (m *CriuOpts) GetWeakSysctls() bool {
if m != nil && m.WeakSysctls != nil {
return *m.WeakSysctls
}
return false
}
func (m *CriuOpts) GetLazyPages() bool {
if m != nil && m.LazyPages != nil {
return *m.LazyPages
}
return false
}
func (m *CriuOpts) GetStatusFd() int32 {
if m != nil && m.StatusFd != nil {
return *m.StatusFd
}
return 0
}
func (m *CriuOpts) GetOrphanPtsMaster() bool {
if m != nil && m.OrphanPtsMaster != nil {
return *m.OrphanPtsMaster
}
return false
}
@ -618,9 +753,10 @@ type CriuDumpResp struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
func (*CriuDumpResp) ProtoMessage() {}
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
func (*CriuDumpResp) ProtoMessage() {}
func (*CriuDumpResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} }
func (m *CriuDumpResp) GetRestored() bool {
if m != nil && m.Restored != nil {
@ -634,9 +770,10 @@ type CriuRestoreResp struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
func (*CriuRestoreResp) ProtoMessage() {}
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
func (*CriuRestoreResp) ProtoMessage() {}
func (*CriuRestoreResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} }
func (m *CriuRestoreResp) GetPid() int32 {
if m != nil && m.Pid != nil {
@ -651,9 +788,10 @@ type CriuNotify struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
func (*CriuNotify) ProtoMessage() {}
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
func (*CriuNotify) ProtoMessage() {}
func (*CriuNotify) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} }
func (m *CriuNotify) GetScript() string {
if m != nil && m.Script != nil {
@ -673,13 +811,15 @@ func (m *CriuNotify) GetPid() int32 {
// List of features which can queried via
// CRIU_REQ_TYPE__FEATURE_CHECK
type CriuFeatures struct {
MemTrack *bool `protobuf:"varint,1,opt,name=mem_track" json:"mem_track,omitempty"`
MemTrack *bool `protobuf:"varint,1,opt,name=mem_track,json=memTrack" json:"mem_track,omitempty"`
LazyPages *bool `protobuf:"varint,2,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuFeatures) Reset() { *m = CriuFeatures{} }
func (m *CriuFeatures) String() string { return proto.CompactTextString(m) }
func (*CriuFeatures) ProtoMessage() {}
func (m *CriuFeatures) Reset() { *m = CriuFeatures{} }
func (m *CriuFeatures) String() string { return proto.CompactTextString(m) }
func (*CriuFeatures) ProtoMessage() {}
func (*CriuFeatures) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} }
func (m *CriuFeatures) GetMemTrack() bool {
if m != nil && m.MemTrack != nil {
@ -688,15 +828,22 @@ func (m *CriuFeatures) GetMemTrack() bool {
return false
}
func (m *CriuFeatures) GetLazyPages() bool {
if m != nil && m.LazyPages != nil {
return *m.LazyPages
}
return false
}
type CriuReq struct {
Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"`
Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"`
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success" json:"notify_success,omitempty"`
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success,json=notifySuccess" json:"notify_success,omitempty"`
//
// When set service won't close the connection but
// will wait for more req-s to appear. Works not
// for all request types.
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open" json:"keep_open,omitempty"`
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open,json=keepOpen" json:"keep_open,omitempty"`
//
// 'features' can be used to query which features
// are supported by the installed criu/kernel
@ -705,9 +852,10 @@ type CriuReq struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuReq) Reset() { *m = CriuReq{} }
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
func (*CriuReq) ProtoMessage() {}
func (m *CriuReq) Reset() { *m = CriuReq{} }
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
func (*CriuReq) ProtoMessage() {}
func (*CriuReq) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} }
func (m *CriuReq) GetType() CriuReqType {
if m != nil && m.Type != nil {
@ -751,14 +899,17 @@ type CriuResp struct {
Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"`
Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"`
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno" json:"cr_errno,omitempty"`
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno,json=crErrno" json:"cr_errno,omitempty"`
Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"`
CrErrmsg *string `protobuf:"bytes,9,opt,name=cr_errmsg,json=crErrmsg" json:"cr_errmsg,omitempty"`
Version *CriuVersion `protobuf:"bytes,10,opt,name=version" json:"version,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuResp) Reset() { *m = CriuResp{} }
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
func (*CriuResp) ProtoMessage() {}
func (m *CriuResp) Reset() { *m = CriuResp{} }
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
func (*CriuResp) ProtoMessage() {}
func (*CriuResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} }
func (m *CriuResp) GetType() CriuReqType {
if m != nil && m.Type != nil {
@ -816,7 +967,212 @@ func (m *CriuResp) GetFeatures() *CriuFeatures {
return nil
}
func (m *CriuResp) GetCrErrmsg() string {
if m != nil && m.CrErrmsg != nil {
return *m.CrErrmsg
}
return ""
}
func (m *CriuResp) GetVersion() *CriuVersion {
if m != nil {
return m.Version
}
return nil
}
// Answer for criu_req_type.VERSION requests
type CriuVersion struct {
Major *int32 `protobuf:"varint,1,req,name=major" json:"major,omitempty"`
Minor *int32 `protobuf:"varint,2,req,name=minor" json:"minor,omitempty"`
Gitid *string `protobuf:"bytes,3,opt,name=gitid" json:"gitid,omitempty"`
Sublevel *int32 `protobuf:"varint,4,opt,name=sublevel" json:"sublevel,omitempty"`
Extra *int32 `protobuf:"varint,5,opt,name=extra" json:"extra,omitempty"`
Name *string `protobuf:"bytes,6,opt,name=name" json:"name,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuVersion) Reset() { *m = CriuVersion{} }
func (m *CriuVersion) String() string { return proto.CompactTextString(m) }
func (*CriuVersion) ProtoMessage() {}
func (*CriuVersion) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} }
func (m *CriuVersion) GetMajor() int32 {
if m != nil && m.Major != nil {
return *m.Major
}
return 0
}
func (m *CriuVersion) GetMinor() int32 {
if m != nil && m.Minor != nil {
return *m.Minor
}
return 0
}
func (m *CriuVersion) GetGitid() string {
if m != nil && m.Gitid != nil {
return *m.Gitid
}
return ""
}
func (m *CriuVersion) GetSublevel() int32 {
if m != nil && m.Sublevel != nil {
return *m.Sublevel
}
return 0
}
func (m *CriuVersion) GetExtra() int32 {
if m != nil && m.Extra != nil {
return *m.Extra
}
return 0
}
func (m *CriuVersion) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func init() {
proto.RegisterType((*CriuPageServerInfo)(nil), "criu_page_server_info")
proto.RegisterType((*CriuVethPair)(nil), "criu_veth_pair")
proto.RegisterType((*ExtMountMap)(nil), "ext_mount_map")
proto.RegisterType((*JoinNamespace)(nil), "join_namespace")
proto.RegisterType((*InheritFd)(nil), "inherit_fd")
proto.RegisterType((*CgroupRoot)(nil), "cgroup_root")
proto.RegisterType((*UnixSk)(nil), "unix_sk")
proto.RegisterType((*CriuOpts)(nil), "criu_opts")
proto.RegisterType((*CriuDumpResp)(nil), "criu_dump_resp")
proto.RegisterType((*CriuRestoreResp)(nil), "criu_restore_resp")
proto.RegisterType((*CriuNotify)(nil), "criu_notify")
proto.RegisterType((*CriuFeatures)(nil), "criu_features")
proto.RegisterType((*CriuReq)(nil), "criu_req")
proto.RegisterType((*CriuResp)(nil), "criu_resp")
proto.RegisterType((*CriuVersion)(nil), "criu_version")
proto.RegisterEnum("CriuCgMode", CriuCgMode_name, CriuCgMode_value)
proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value)
}
func init() { proto.RegisterFile("criurpc.proto", fileDescriptor0) }
var fileDescriptor0 = []byte{
// 1781 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0xdd, 0x72, 0x5b, 0xb7,
0x11, 0x0e, 0x29, 0xfe, 0x1c, 0x82, 0x3f, 0xa6, 0x10, 0xdb, 0x81, 0x93, 0xda, 0x62, 0xe8, 0x28,
0x51, 0x15, 0x97, 0x4d, 0x58, 0x3b, 0xae, 0x33, 0xed, 0x85, 0x47, 0x22, 0x5d, 0x36, 0x92, 0xc8,
0x01, 0x25, 0xcf, 0xe4, 0x0a, 0x73, 0x74, 0x0e, 0x48, 0xc1, 0x3c, 0x7f, 0x05, 0x40, 0x45, 0xf2,
0x83, 0xf4, 0x29, 0xfa, 0x0c, 0x7d, 0x84, 0xbe, 0x4e, 0x6f, 0x3b, 0xbb, 0x00, 0x65, 0x29, 0xc9,
0xb4, 0xbd, 0xc3, 0x7e, 0x58, 0x00, 0xbb, 0xfb, 0xed, 0x0f, 0x48, 0x3b, 0xd2, 0x6a, 0xad, 0x8b,
0x68, 0x50, 0xe8, 0xdc, 0xe6, 0xfd, 0x25, 0x79, 0x00, 0x80, 0x28, 0xc2, 0xa5, 0x14, 0x46, 0xea,
0x4b, 0xa9, 0x85, 0xca, 0x16, 0x39, 0x65, 0xa4, 0x1e, 0xc6, 0xb1, 0x96, 0xc6, 0xb0, 0x52, 0xaf,
0xb4, 0xd7, 0xe0, 0x1b, 0x91, 0x52, 0x52, 0x29, 0x72, 0x6d, 0x59, 0xb9, 0x57, 0xda, 0xab, 0x72,
0x5c, 0xd3, 0x2e, 0xd9, 0x2a, 0x54, 0xcc, 0xb6, 0x10, 0x82, 0x25, 0xed, 0x90, 0xf2, 0x22, 0x66,
0x15, 0x04, 0xca, 0x8b, 0xb8, 0xff, 0x27, 0xd2, 0xc1, 0x87, 0x2e, 0xa5, 0xbd, 0x10, 0x45, 0xa8,
0x34, 0xfd, 0x98, 0x54, 0xd5, 0x42, 0xa8, 0x8c, 0x95, 0x7a, 0xe5, 0xbd, 0x06, 0xaf, 0xa8, 0xc5,
0x24, 0xa3, 0x0f, 0x48, 0x4d, 0x2d, 0x44, 0xbe, 0x86, 0xeb, 0x01, 0xad, 0xaa, 0xc5, 0x74, 0x6d,
0xfb, 0x7f, 0x20, 0x6d, 0x79, 0x65, 0x45, 0x9a, 0xaf, 0x33, 0x2b, 0xd2, 0xb0, 0x80, 0x07, 0x57,
0xf2, 0xda, 0x1f, 0x85, 0x25, 0x20, 0x97, 0x61, 0xe2, 0x8f, 0xc1, 0xb2, 0xff, 0x96, 0x74, 0xde,
0xe5, 0x2a, 0x13, 0x59, 0x98, 0x4a, 0x53, 0x84, 0x91, 0x04, 0xa3, 0x32, 0xe3, 0x0f, 0x95, 0x33,
0x43, 0x3f, 0x21, 0xf5, 0xcc, 0x88, 0x85, 0x4a, 0xa4, 0x3f, 0x57, 0xcb, 0xcc, 0x58, 0x25, 0x92,
0x7e, 0x46, 0x1a, 0xf2, 0xca, 0xea, 0x50, 0xe4, 0x85, 0x45, 0xaf, 0x1a, 0x3c, 0x40, 0x60, 0x5a,
0xd8, 0xfe, 0x80, 0x10, 0x95, 0x5d, 0x48, 0xad, 0xac, 0x58, 0xc4, 0xbf, 0x62, 0x89, 0x73, 0x1d,
0x2e, 0x74, 0xae, 0xbf, 0x20, 0xcd, 0x68, 0xa9, 0xf3, 0x75, 0x21, 0x74, 0x9e, 0x5b, 0x88, 0x5f,
0x64, 0x75, 0xe2, 0xc3, 0x8a, 0x6b, 0x8c, 0x69, 0x68, 0x2f, 0xbc, 0x15, 0xb8, 0xee, 0xef, 0x90,
0xfa, 0x3a, 0x53, 0x57, 0xc2, 0xac, 0xe8, 0x7d, 0x52, 0x55, 0x59, 0x1e, 0x4b, 0x7c, 0xa5, 0xcd,
0x9d, 0xd0, 0xff, 0x57, 0x9b, 0x34, 0x30, 0xa6, 0x79, 0x61, 0x0d, 0xed, 0x93, 0xb6, 0x4a, 0xc3,
0xa5, 0x34, 0x22, 0x56, 0x5a, 0x2c, 0x62, 0xd4, 0xad, 0xf2, 0xa6, 0x03, 0x0f, 0x95, 0x1e, 0xc7,
0x1b, 0x9a, 0xca, 0x1f, 0x68, 0x7a, 0x4a, 0xda, 0x89, 0x0c, 0x2f, 0xa5, 0xd0, 0xeb, 0x2c, 0x53,
0xd9, 0x12, 0x9d, 0x0d, 0x78, 0x0b, 0x41, 0xee, 0x30, 0xfa, 0x84, 0x34, 0x21, 0xfa, 0xde, 0x1a,
0x24, 0x35, 0xe0, 0x10, 0xa0, 0xb3, 0x4c, 0x5d, 0xcd, 0x57, 0xf4, 0x2b, 0x72, 0xcf, 0x46, 0x85,
0x90, 0xc6, 0x86, 0xe7, 0x89, 0x32, 0x17, 0x32, 0x66, 0x55, 0xd4, 0xe9, 0xd8, 0xa8, 0x18, 0x7d,
0x40, 0x41, 0x51, 0x5e, 0x86, 0x46, 0x5d, 0x4a, 0x11, 0xcb, 0x4b, 0x15, 0x49, 0xc3, 0x6a, 0x4e,
0xd1, 0xc3, 0x87, 0x0e, 0x85, 0xf8, 0x9b, 0x0b, 0x99, 0x24, 0xe2, 0x5d, 0x7e, 0xce, 0xea, 0xa8,
0x12, 0x20, 0xf0, 0xd7, 0xfc, 0x9c, 0x3e, 0x26, 0x04, 0x28, 0x13, 0x49, 0x1e, 0xad, 0x0c, 0x0b,
0x9c, 0x35, 0x80, 0x1c, 0x01, 0x40, 0x9f, 0x90, 0x46, 0x92, 0x2f, 0x45, 0x22, 0x2f, 0x65, 0xc2,
0x1a, 0xe0, 0xea, 0xf7, 0xa5, 0x21, 0x0f, 0x92, 0x7c, 0x79, 0x04, 0x10, 0x7d, 0x44, 0x60, 0xed,
0x58, 0x27, 0x2e, 0xb5, 0x93, 0x7c, 0x89, 0xb4, 0x7f, 0x49, 0xca, 0x85, 0x61, 0xcd, 0x5e, 0x69,
0xaf, 0x39, 0x7c, 0x38, 0xf8, 0xd5, 0xc2, 0xe0, 0xe5, 0xc2, 0xd0, 0x5d, 0xd2, 0xc9, 0x72, 0xab,
0x16, 0xd7, 0xc2, 0x44, 0x5a, 0x15, 0xd6, 0xb0, 0x16, 0x5a, 0xd1, 0x76, 0xe8, 0xdc, 0x81, 0xc0,
0x2a, 0x30, 0xce, 0xda, 0x8e, 0x69, 0x64, 0xff, 0x31, 0x21, 0x45, 0xa8, 0x65, 0x66, 0x85, 0x4a,
0x97, 0xac, 0x83, 0x3b, 0x0d, 0x87, 0x4c, 0xd2, 0x25, 0x38, 0x6e, 0x75, 0x18, 0xad, 0x44, 0x2a,
0x53, 0x76, 0xcf, 0x39, 0x8e, 0xc0, 0xb1, 0x4c, 0xe1, 0x6c, 0xb8, 0xb6, 0xb9, 0x88, 0x65, 0xbc,
0x2e, 0x58, 0xd7, 0x39, 0x0e, 0xc8, 0x21, 0x00, 0x40, 0xd3, 0x4f, 0xb9, 0x5e, 0x6d, 0xf8, 0xdf,
0x46, 0x96, 0x1b, 0x00, 0x39, 0xf6, 0x1f, 0x13, 0x92, 0xa8, 0x6c, 0x25, 0xb4, 0x4c, 0xc3, 0x82,
0x51, 0x77, 0x1c, 0x10, 0x0e, 0x00, 0xdd, 0x25, 0x55, 0x28, 0x4e, 0xc3, 0x3e, 0xee, 0x6d, 0xed,
0x35, 0x87, 0xf7, 0x06, 0x77, 0xeb, 0x95, 0xbb, 0x5d, 0xfa, 0x94, 0xd4, 0xa3, 0x62, 0x2d, 0xa2,
0xb0, 0x60, 0xf7, 0x7b, 0xa5, 0xbd, 0xf6, 0xf7, 0xe4, 0xf9, 0xf0, 0xd5, 0xf3, 0x57, 0xdf, 0xbd,
0x1c, 0xbe, 0x7a, 0xc1, 0x6b, 0x51, 0xb1, 0x3e, 0x08, 0x0b, 0xba, 0x43, 0x9a, 0x8b, 0x5c, 0x47,
0x52, 0x28, 0x0d, 0x6f, 0x3d, 0xc0, 0xb7, 0x08, 0x42, 0x13, 0x40, 0x80, 0x04, 0x79, 0x25, 0x23,
0x11, 0xa5, 0x31, 0x7b, 0xd8, 0xdb, 0x02, 0x12, 0x40, 0x3e, 0x48, 0x21, 0x49, 0xea, 0x58, 0xeb,
0x99, 0x65, 0x9f, 0xa0, 0x25, 0x9d, 0xc1, 0x9d, 0xda, 0xe7, 0x35, 0x79, 0x65, 0x8f, 0x33, 0x0b,
0x2c, 0xa4, 0x61, 0x06, 0xfc, 0xb8, 0xf2, 0x32, 0x8c, 0x39, 0x16, 0x1c, 0x7a, 0xe0, 0x40, 0xba,
0x4b, 0xea, 0xd1, 0x12, 0x4b, 0x8f, 0x3d, 0xc2, 0xfb, 0x5a, 0x83, 0x5b, 0xe5, 0xc8, 0x6b, 0xd1,
0x92, 0x03, 0x31, 0x3b, 0xa4, 0xa9, 0x8d, 0x15, 0x46, 0x9d, 0x27, 0x50, 0x07, 0x9f, 0x3a, 0x93,
0xb5, 0xb1, 0x73, 0x87, 0xd0, 0xfd, 0xdb, 0x65, 0xcf, 0x3e, 0xc3, 0xab, 0x9a, 0x83, 0x0f, 0x10,
0x6f, 0xf8, 0xf5, 0x38, 0xa6, 0x3d, 0xd2, 0x42, 0xa6, 0x36, 0x8e, 0xfc, 0xc6, 0xdd, 0x06, 0xd8,
0xc8, 0x19, 0xbf, 0xe3, 0x6a, 0xca, 0x5c, 0x84, 0x1a, 0x9e, 0x7b, 0xec, 0x14, 0xe4, 0x95, 0x9d,
0x3b, 0x64, 0xa3, 0x90, 0x86, 0xc6, 0x4a, 0x6d, 0xd8, 0x93, 0x1b, 0x85, 0x63, 0x87, 0x40, 0x08,
0xcd, 0x4a, 0x15, 0x78, 0xff, 0x8e, 0x0b, 0x21, 0xc8, 0x70, 0x39, 0xb4, 0xaf, 0x2c, 0x3c, 0x4f,
0xa4, 0x58, 0x18, 0xd6, 0xc3, 0xbd, 0xc0, 0x01, 0x63, 0x43, 0xf7, 0x48, 0xd3, 0x57, 0xb2, 0x50,
0x59, 0xce, 0x3e, 0x47, 0x47, 0x82, 0x81, 0xc7, 0x78, 0x63, 0x8d, 0x45, 0x3d, 0xc9, 0x72, 0xfa,
0x67, 0xf2, 0xf1, 0xdd, 0x00, 0x8b, 0x14, 0x9a, 0x50, 0xbf, 0x57, 0xda, 0xeb, 0x0c, 0xdb, 0x2e,
0x3f, 0xa2, 0x25, 0x82, 0x7c, 0xfb, 0x4e, 0xd0, 0x8f, 0xf3, 0x58, 0xc2, 0x43, 0xcb, 0x8b, 0xdc,
0x58, 0x91, 0xa8, 0x54, 0x59, 0xf6, 0x14, 0xb3, 0xa5, 0xfe, 0xed, 0x37, 0xcf, 0xff, 0xf8, 0xe2,
0xe5, 0x77, 0x9c, 0xe0, 0xde, 0x11, 0x6c, 0xd1, 0x3d, 0xd2, 0xc5, 0x44, 0x11, 0x26, 0x0a, 0x33,
0x01, 0xdd, 0xcf, 0xb0, 0x2f, 0xd0, 0xec, 0x0e, 0xe2, 0xf3, 0x28, 0xcc, 0x66, 0x80, 0xd2, 0x4f,
0x21, 0x6f, 0xac, 0xd4, 0x59, 0x98, 0xb0, 0x5d, 0xef, 0x98, 0x97, 0x31, 0xa7, 0xd2, 0xc2, 0x5e,
0x8b, 0xcc, 0xb0, 0x2f, 0xe1, 0x31, 0x5e, 0x47, 0xf9, 0x04, 0x7c, 0xae, 0xbb, 0x51, 0x60, 0xd8,
0x57, 0x3e, 0xbb, 0xef, 0x8e, 0x06, 0x5e, 0x03, 0xf9, 0xc4, 0xd0, 0xcf, 0x49, 0xcb, 0x67, 0x47,
0xa1, 0xf3, 0xc2, 0xb0, 0xdf, 0x62, 0x85, 0xfa, 0x06, 0x3e, 0x03, 0x88, 0xee, 0x93, 0xed, 0xdb,
0x2a, 0xae, 0x93, 0xec, 0xa3, 0xde, 0xbd, 0x5b, 0x7a, 0xd8, 0x51, 0x9e, 0x93, 0x87, 0x5e, 0x37,
0x5e, 0xa7, 0x85, 0x88, 0xf2, 0xcc, 0xea, 0x3c, 0x49, 0xa4, 0x66, 0x5f, 0xa3, 0xf5, 0xf7, 0xdd,
0xee, 0xe1, 0x3a, 0x2d, 0x0e, 0x6e, 0xf6, 0xa0, 0x2b, 0x2f, 0xb4, 0x94, 0xef, 0x37, 0x81, 0x67,
0xcf, 0xf0, 0xf6, 0x96, 0x03, 0x5d, 0x8c, 0x61, 0x42, 0x5b, 0x95, 0x4a, 0x98, 0x95, 0xbf, 0x73,
0xde, 0x7a, 0x91, 0x7e, 0x4d, 0x28, 0xf4, 0x63, 0xcc, 0x0e, 0x95, 0x89, 0x45, 0xa2, 0x96, 0x17,
0x96, 0x0d, 0x30, 0x83, 0xa0, 0x53, 0xcf, 0x57, 0xaa, 0x98, 0x64, 0x63, 0x84, 0xc1, 0xe1, 0x9f,
0x64, 0xb8, 0x12, 0xe6, 0xda, 0x44, 0x36, 0x31, 0xec, 0xf7, 0xa8, 0xd6, 0x04, 0x6c, 0xee, 0x20,
0x6c, 0x1c, 0xe1, 0xfb, 0x6b, 0xec, 0x85, 0x86, 0x7d, 0xe3, 0x1b, 0x47, 0xf8, 0xfe, 0x7a, 0x06,
0x00, 0x36, 0x6b, 0x1b, 0xda, 0xb5, 0x81, 0xba, 0xf8, 0x16, 0xbb, 0x4e, 0xe0, 0x80, 0x71, 0x0c,
0xc1, 0xca, 0x75, 0x71, 0x01, 0xb4, 0x5a, 0xe3, 0xb3, 0x99, 0x0d, 0x9d, 0x29, 0x6e, 0x63, 0x66,
0x8d, 0x4b, 0xe9, 0xfe, 0x33, 0xff, 0x47, 0xc0, 0x50, 0x69, 0x69, 0x0a, 0xa0, 0x5b, 0x4b, 0x63,
0x73, 0x2d, 0x63, 0x9c, 0x97, 0x01, 0xbf, 0x91, 0xfb, 0xbb, 0x64, 0x1b, 0xb5, 0x3d, 0xe0, 0x0e,
0xf8, 0x09, 0xe7, 0x66, 0x1f, 0x2c, 0xfb, 0x2f, 0x49, 0x13, 0xd5, 0x5c, 0x6b, 0xa6, 0x0f, 0x49,
0xcd, 0xf5, 0x6c, 0x3f, 0x7f, 0xbd, 0xf4, 0xcb, 0xd1, 0xd8, 0xff, 0xc1, 0xfd, 0x95, 0xc4, 0x42,
0x86, 0x76, 0xad, 0x9d, 0x9f, 0xa9, 0x4c, 0x05, 0xb6, 0xe3, 0x8d, 0x35, 0xa9, 0x4c, 0x4f, 0x41,
0xfe, 0x59, 0x8c, 0xca, 0x3f, 0x8b, 0x51, 0xff, 0x9f, 0x25, 0x12, 0x78, 0x6b, 0xff, 0x46, 0xfb,
0xa4, 0x62, 0xaf, 0x0b, 0x37, 0xcd, 0x3b, 0xc3, 0xce, 0x60, 0xb3, 0x21, 0x00, 0xe5, 0xb8, 0x47,
0x9f, 0x90, 0x0a, 0x8c, 0x75, 0xbc, 0xa9, 0x39, 0x24, 0x83, 0x9b, 0x41, 0xcf, 0x11, 0xbf, 0x3d,
0x82, 0xd6, 0x51, 0x04, 0xdf, 0xb4, 0xad, 0x3b, 0x23, 0xc8, 0x81, 0x60, 0xf3, 0x4a, 0xca, 0x42,
0xe4, 0x85, 0xcc, 0xfc, 0xe0, 0x0e, 0x00, 0x98, 0x16, 0x32, 0xa3, 0xfb, 0x24, 0xd8, 0x38, 0x87,
0x03, 0xbb, 0xb9, 0xb1, 0x65, 0x83, 0xf2, 0x9b, 0xfd, 0xfe, 0xbf, 0xcb, 0xfe, 0xb3, 0x81, 0x61,
0xfe, 0x7f, 0x3c, 0x60, 0xa4, 0xbe, 0x31, 0x0d, 0xbe, 0x35, 0x01, 0xdf, 0x88, 0xf4, 0x29, 0xa9,
0x00, 0xc5, 0x68, 0xf1, 0xcd, 0xa0, 0xb9, 0x21, 0x9d, 0xe3, 0x26, 0x7d, 0x46, 0xea, 0x9e, 0x59,
0xb4, 0xbb, 0x39, 0xa4, 0x83, 0x5f, 0xd0, 0xcd, 0x37, 0x2a, 0xf4, 0x0b, 0x52, 0x73, 0x8e, 0x7b,
0x47, 0x5a, 0x83, 0x5b, 0xa4, 0x73, 0xbf, 0xe7, 0xe7, 0x7b, 0xed, 0x7f, 0xce, 0xf7, 0x47, 0x40,
0x96, 0x90, 0x5a, 0x67, 0x39, 0xfe, 0x3e, 0xaa, 0xbc, 0x1e, 0xe9, 0x11, 0x88, 0x77, 0x62, 0x16,
0xfc, 0xf7, 0x98, 0x41, 0xf0, 0xdd, 0x35, 0xa9, 0x59, 0xe2, 0x4f, 0xa4, 0xc1, 0x03, 0xbc, 0x27,
0x35, 0x4b, 0x18, 0x73, 0x97, 0x52, 0x1b, 0x95, 0x67, 0xf8, 0x0b, 0x69, 0x6e, 0x1a, 0xaa, 0x07,
0xf9, 0x66, 0xb7, 0xff, 0xf7, 0x12, 0x69, 0xdd, 0xde, 0x81, 0xdf, 0x60, 0x1a, 0xbe, 0xcb, 0xb5,
0xcf, 0x72, 0x27, 0x20, 0xaa, 0xb2, 0x5c, 0xfb, 0x8f, 0xa7, 0x13, 0x00, 0x5d, 0x2a, 0xeb, 0xbf,
0xe6, 0x0d, 0xee, 0x04, 0x28, 0x2b, 0xb3, 0x3e, 0x77, 0x3f, 0xa4, 0x8a, 0x2f, 0x58, 0x2f, 0xc3,
0x09, 0xfc, 0xe9, 0x62, 0x20, 0xab, 0xdc, 0x09, 0xf0, 0x95, 0x81, 0x5e, 0x89, 0xb1, 0x6b, 0x70,
0x5c, 0xef, 0x0b, 0x6f, 0x97, 0x1f, 0x01, 0x94, 0x90, 0xda, 0xe4, 0xcd, 0xc9, 0x94, 0x8f, 0xba,
0x1f, 0xd1, 0x26, 0xa9, 0x1f, 0xbc, 0x11, 0x27, 0xd3, 0x93, 0x51, 0xb7, 0x44, 0x1b, 0xa4, 0x3a,
0xe3, 0xd3, 0xd9, 0xbc, 0x5b, 0xa6, 0x01, 0xa9, 0xcc, 0xa7, 0xe3, 0xd3, 0xee, 0x16, 0xac, 0xc6,
0x67, 0x47, 0x47, 0xdd, 0x0a, 0x9c, 0x9b, 0x9f, 0xf2, 0xc9, 0xc1, 0x69, 0xb7, 0x0a, 0xe7, 0x0e,
0x47, 0xe3, 0xd7, 0x67, 0x47, 0xa7, 0xdd, 0xda, 0xfe, 0x3f, 0x4a, 0xbe, 0x04, 0x37, 0x99, 0x05,
0x37, 0x8d, 0x8e, 0x67, 0xa7, 0x3f, 0x76, 0x3f, 0x82, 0xf3, 0x87, 0x67, 0xc7, 0xb3, 0x6e, 0x09,
0xce, 0xf0, 0xd1, 0xfc, 0x14, 0x1e, 0x2e, 0x83, 0xc6, 0xc1, 0x5f, 0x46, 0x07, 0x3f, 0x74, 0xb7,
0x68, 0x8b, 0x04, 0x33, 0x3e, 0x12, 0xa8, 0x55, 0xa1, 0xf7, 0x48, 0x73, 0xf6, 0xfa, 0xcd, 0x48,
0xcc, 0x47, 0xfc, 0xed, 0x88, 0x77, 0xab, 0xf0, 0xec, 0xc9, 0xf4, 0x74, 0x32, 0xfe, 0xb1, 0x5b,
0xa3, 0x5d, 0xd2, 0x3a, 0x98, 0x9d, 0x4d, 0x4e, 0xc6, 0x53, 0xa7, 0x5e, 0xa7, 0xdb, 0xa4, 0xbd,
0x41, 0xdc, 0x7d, 0x01, 0x40, 0xe3, 0xd1, 0xeb, 0xd3, 0x33, 0x3e, 0xf2, 0x50, 0x03, 0x9e, 0x7e,
0x3b, 0xe2, 0xf3, 0xc9, 0xf4, 0xa4, 0x4b, 0xfe, 0x13, 0x00, 0x00, 0xff, 0xff, 0x5f, 0x2a, 0xaf,
0x49, 0x5b, 0x0d, 0x00, 0x00,
}

View file

@ -0,0 +1,209 @@
syntax = "proto2";
message criu_page_server_info {
optional string address = 1;
optional int32 port = 2;
optional int32 pid = 3;
optional int32 fd = 4;
}
message criu_veth_pair {
required string if_in = 1;
required string if_out = 2;
};
message ext_mount_map {
required string key = 1;
required string val = 2;
};
message join_namespace {
required string ns = 1;
required string ns_file = 2;
optional string extra_opt = 3;
}
message inherit_fd {
required string key = 1;
required int32 fd = 2;
};
message cgroup_root {
optional string ctrl = 1;
required string path = 2;
};
message unix_sk {
required uint32 inode = 1;
};
enum criu_cg_mode {
IGNORE = 0;
CG_NONE = 1;
PROPS = 2;
SOFT = 3;
FULL = 4;
STRICT = 5;
DEFAULT = 6;
};
message criu_opts {
required int32 images_dir_fd = 1;
optional int32 pid = 2; /* if not set on dump, will dump requesting process */
optional bool leave_running = 3;
optional bool ext_unix_sk = 4;
optional bool tcp_established = 5;
optional bool evasive_devices = 6;
optional bool shell_job = 7;
optional bool file_locks = 8;
optional int32 log_level = 9 [default = 2];
optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */
optional criu_page_server_info ps = 11;
optional bool notify_scripts = 12;
optional string root = 13;
optional string parent_img = 14;
optional bool track_mem = 15;
optional bool auto_dedup = 16;
optional int32 work_dir_fd = 17;
optional bool link_remap = 18;
repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */
optional uint32 cpu_cap = 20 [default = 0xffffffff];
optional bool force_irmap = 21;
repeated string exec_cmd = 22;
repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */
optional bool manage_cgroups = 24; /* backward compatibility */
repeated cgroup_root cg_root = 25;
optional bool rst_sibling = 26; /* swrk only */
repeated inherit_fd inherit_fd = 27; /* swrk only */
optional bool auto_ext_mnt = 28;
optional bool ext_sharing = 29;
optional bool ext_masters = 30;
repeated string skip_mnt = 31;
repeated string enable_fs = 32;
repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */
optional criu_cg_mode manage_cgroups_mode = 34;
optional uint32 ghost_limit = 35 [default = 0x100000];
repeated string irmap_scan_paths = 36;
repeated string external = 37;
optional uint32 empty_ns = 38;
repeated join_namespace join_ns = 39;
optional string cgroup_props = 41;
optional string cgroup_props_file = 42;
repeated string cgroup_dump_controller = 43;
optional string freeze_cgroup = 44;
optional uint32 timeout = 45;
optional bool tcp_skip_in_flight = 46;
optional bool weak_sysctls = 47;
optional bool lazy_pages = 48;
optional int32 status_fd = 49;
optional bool orphan_pts_master = 50;
}
message criu_dump_resp {
optional bool restored = 1;
}
message criu_restore_resp {
required int32 pid = 1;
}
message criu_notify {
optional string script = 1;
optional int32 pid = 2;
}
enum criu_req_type {
EMPTY = 0;
DUMP = 1;
RESTORE = 2;
CHECK = 3;
PRE_DUMP = 4;
PAGE_SERVER = 5;
NOTIFY = 6;
CPUINFO_DUMP = 7;
CPUINFO_CHECK = 8;
FEATURE_CHECK = 9;
VERSION = 10;
}
/*
* List of features which can queried via
* CRIU_REQ_TYPE__FEATURE_CHECK
*/
message criu_features {
optional bool mem_track = 1;
optional bool lazy_pages = 2;
}
/*
* Request -- each type corresponds to must-be-there
* request arguments of respective type
*/
message criu_req {
required criu_req_type type = 1;
optional criu_opts opts = 2;
optional bool notify_success = 3;
/*
* When set service won't close the connection but
* will wait for more req-s to appear. Works not
* for all request types.
*/
optional bool keep_open = 4;
/*
* 'features' can be used to query which features
* are supported by the installed criu/kernel
* via RPC.
*/
optional criu_features features = 5;
}
/*
* Response -- it states whether the request was served
* and additional request-specific information
*/
message criu_resp {
required criu_req_type type = 1;
required bool success = 2;
optional criu_dump_resp dump = 3;
optional criu_restore_resp restore = 4;
optional criu_notify notify = 5;
optional criu_page_server_info ps = 6;
optional int32 cr_errno = 7;
optional criu_features features = 8;
optional string cr_errmsg = 9;
optional criu_version version = 10;
}
/* Answer for criu_req_type.VERSION requests */
message criu_version {
required int32 major = 1;
required int32 minor = 2;
optional string gitid = 3;
optional int32 sublevel = 4;
optional int32 extra = 5;
optional string name = 6;
}

View file

@ -0,0 +1,104 @@
package devices
import (
"errors"
"io/ioutil"
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
var (
ErrNotADevice = errors.New("not a device node")
)
// Testing dependencies
var (
unixLstat = unix.Lstat
ioutilReadDir = ioutil.ReadDir
)
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the information about a linux device and return that information as a Device struct.
func DeviceFromPath(path, permissions string) (*configs.Device, error) {
var stat unix.Stat_t
err := unixLstat(path, &stat)
if err != nil {
return nil, err
}
var (
devNumber = stat.Rdev
major = unix.Major(devNumber)
)
if major == 0 {
return nil, ErrNotADevice
}
var (
devType rune
mode = stat.Mode
)
switch {
case mode&unix.S_IFBLK == unix.S_IFBLK:
devType = 'b'
case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = 'c'
}
return &configs.Device{
Type: devType,
Path: path,
Major: int64(major),
Minor: int64(unix.Minor(devNumber)),
Permissions: permissions,
FileMode: os.FileMode(mode),
Uid: stat.Uid,
Gid: stat.Gid,
}, nil
}
func HostDevices() ([]*configs.Device, error) {
return getDevices("/dev")
}
func getDevices(path string) ([]*configs.Device, error) {
files, err := ioutilReadDir(path)
if err != nil {
return nil, err
}
out := []*configs.Device{}
for _, f := range files {
switch {
case f.IsDir():
switch f.Name() {
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
continue
default:
sub, err := getDevices(filepath.Join(path, f.Name()))
if err != nil {
return nil, err
}
out = append(out, sub...)
continue
}
case f.Name() == "console":
continue
}
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
if err != nil {
if err == ErrNotADevice {
continue
}
if os.IsNotExist(err) {
continue
}
return nil, err
}
out = append(out, device)
}
return out, nil
}

View file

@ -0,0 +1,63 @@
package devices
import (
"errors"
"os"
"testing"
"golang.org/x/sys/unix"
)
func TestDeviceFromPathLstatFailure(t *testing.T) {
testError := errors.New("test error")
// Override unix.Lstat to inject error.
unixLstat = func(path string, stat *unix.Stat_t) error {
return testError
}
_, err := DeviceFromPath("", "")
if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError)
}
}
func TestHostDevicesIoutilReadDirFailure(t *testing.T) {
testError := errors.New("test error")
// Override ioutil.ReadDir to inject error.
ioutilReadDir = func(dirname string) ([]os.FileInfo, error) {
return nil, testError
}
_, err := HostDevices()
if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError)
}
}
func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) {
testError := errors.New("test error")
called := false
// Override ioutil.ReadDir to inject error after the first call.
ioutilReadDir = func(dirname string) ([]os.FileInfo, error) {
if called {
return nil, testError
}
called = true
// Provoke a second call.
fi, err := os.Lstat("/tmp")
if err != nil {
t.Fatalf("Unexpected error %v", err)
}
return []os.FileInfo{fi}, nil
}
_, err := HostDevices()
if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError)
}
}

View file

@ -60,9 +60,9 @@ func (c ErrorCode) String() string {
type Error interface {
error
// Returns a verbose string including the error message
// and a representation of the stack trace suitable for
// printing.
// Returns an error if it failed to write the detail of the Error to w.
// The detail of the Error may include the error message and a
// representation of the stack trace.
Detail(w io.Writer) error
// Returns the error code for this error.

View file

@ -0,0 +1,25 @@
package libcontainer
import "testing"
func TestErrorCode(t *testing.T) {
codes := map[ErrorCode]string{
IdInUse: "Id already in use",
InvalidIdFormat: "Invalid format",
ContainerPaused: "Container paused",
ConfigInvalid: "Invalid configuration",
SystemError: "System error",
ContainerNotExists: "Container does not exist",
ContainerNotStopped: "Container is not stopped",
ContainerNotRunning: "Container is not running",
ConsoleExists: "Console exists for process",
ContainerNotPaused: "Container is not paused",
NoProcessOps: "No process operations",
}
for code, expected := range codes {
if actual := code.String(); actual != expected {
t.Fatalf("expected string %q but received %q", expected, actual)
}
}
}

View file

@ -10,7 +10,7 @@ type Factory interface {
// between 1 and 1024 characters, inclusive.
//
// The id must not already be in use by an existing container. Containers created using
// a factory with the same path (and file system) must have distinct ids.
// a factory with the same path (and filesystem) must have distinct ids.
//
// Returns the new container with a running process.
//
@ -28,7 +28,6 @@ type Factory interface {
//
// errors:
// Path does not exist
// Container is stopped
// System error
Load(id string) (Container, error)

View file

@ -6,58 +6,42 @@ import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime/debug"
"strconv"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/mount"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
const (
stateFilename = "state.json"
stateFilename = "state.json"
execFifoFilename = "exec.fifo"
)
var (
idRegex = regexp.MustCompile(`^[\w-\.]+$`)
maxIdLen = 1024
)
var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init arguments.
// provided init binary path and arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
name := args[0]
if filepath.Base(name) == name {
if lp, err := exec.LookPath(name); err == nil {
name = lp
return func(l *LinuxFactory) (err error) {
if len(args) > 0 {
// Resolve relative paths to ensure that its available
// after directory changes.
if args[0], err = filepath.Abs(args[0]); err != nil {
return newGenericError(err, ConfigInvalid)
}
} else {
abs, err := filepath.Abs(name)
if err != nil {
return err
}
name = abs
}
l.InitPath = "/proc/self/exe"
l.InitArgs = append([]string{name}, args[1:]...)
return nil
}
}
// InitPath returns an options func to configure a LinuxFactory with the
// provided absolute path to the init binary and arguements.
func InitPath(path string, args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.InitPath = path
l.InitArgs = args
return nil
}
@ -88,6 +72,20 @@ func Cgroupfs(l *LinuxFactory) error {
return nil
}
// IntelRdtfs is an options func to configure a LinuxFactory to return
// containers that use the Intel RDT "resource control" filesystem to
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
func IntelRdtFs(l *LinuxFactory) error {
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
return &intelrdt.IntelRdtManager{
Config: config,
Id: id,
Path: path,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
@ -95,13 +93,22 @@ func TmpfsRoot(l *LinuxFactory) error {
return err
}
if !mounted {
if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
return err
}
}
return nil
}
// CriuPath returns an option func to configure a LinuxFactory with the
// provided criupath
func CriuPath(criupath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.CriuPath = criupath
return nil
}
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
@ -112,12 +119,16 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
}
l := &LinuxFactory{
Root: root,
InitPath: "/proc/self/exe",
InitArgs: []string{os.Args[0], "init"},
Validator: validate.New(),
CriuPath: "criu",
}
InitArgs(os.Args[0], "init")(l)
Cgroupfs(l)
for _, opt := range options {
if opt == nil {
continue
}
if err := opt(l); err != nil {
return nil, err
}
@ -130,7 +141,8 @@ type LinuxFactory struct {
// Root directory for the factory to store state.
Root string
// InitPath is the absolute path to the init binary.
// InitPath is the path for calling the init responsibilities for spawning
// a container.
InitPath string
// InitArgs are arguments for calling the init responsibilities for spawning
@ -141,11 +153,19 @@ type LinuxFactory struct {
// containers.
CriuPath string
// New{u,g}uidmapPath is the path to the binaries used for mapping with
// rootless containers.
NewuidmapPath string
NewgidmapPath string
// Validator provides validation to container configurations.
Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@ -164,7 +184,10 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0700); err != nil {
if err := os.MkdirAll(containerRoot, 0711); err != nil {
return nil, newGenericError(err, SystemError)
}
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
return nil, newGenericError(err, SystemError)
}
c := &linuxContainer{
@ -174,8 +197,13 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
if intelrdt.IsEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c}
return c, nil
}
@ -185,7 +213,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
state, err := l.loadState(containerRoot)
state, err := l.loadState(containerRoot, id)
if err != nil {
return nil, err
}
@ -195,20 +223,26 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
fds: state.ExternalDescriptors,
}
c := &linuxContainer{
initProcess: r,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
}
c.state = &createdState{c: c, s: Created}
c.state = &loadedState{c: c}
if err := c.refreshState(); err != nil {
return nil, err
}
if intelrdt.IsEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
}
return c, nil
}
@ -219,55 +253,79 @@ func (l *LinuxFactory) Type() string {
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
pipefd, err := strconv.Atoi(fdStr)
var (
pipefd, fifofd int
consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
)
// Get the INITPIPE.
pipefd, err = strconv.Atoi(envInitPipe)
if err != nil {
return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
defer pipe.Close()
// Only init processes have FIFOFD.
fifofd = -1
if it == initStandard {
if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
}
}
if envConsole != "" {
console, err := strconv.Atoi(envConsole)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
}
consoleSocket = os.NewFile(uintptr(console), "console-socket")
defer consoleSocket.Close()
}
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
var i initer
defer func() {
// We have an error during the initialization of the container's init,
// send it back to the parent process in the form of an initError.
// If container's init successed, syscall.Exec will not return, hence
// this defer function will never be called.
if _, ok := i.(*linuxStandardInit); ok {
// Synchronisation only necessary for standard init.
if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
panic(err)
}
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
panic(err)
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
// ensure that this pipe is always closed
pipe.Close()
}()
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
}
}()
i, err = newContainerInit(it, pipe)
i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
if err != nil {
return err
}
// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
return i.Init()
}
func (l *LinuxFactory) loadState(root string) (*State, error) {
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
f, err := os.Open(filepath.Join(root, stateFilename))
if err != nil {
if os.IsNotExist(err) {
return nil, newGenericError(err, ContainerNotExists)
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
}
return nil, newGenericError(err, SystemError)
}
@ -283,8 +341,24 @@ func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
if len(id) > maxIdLen {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
return nil
}
// NewuidmapPath returns an option func to configure a LinuxFactory with the
// provided ..
func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.NewuidmapPath = newuidmapPath
return nil
}
}
// NewgidmapPath returns an option func to configure a LinuxFactory with the
// provided ..
func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.NewgidmapPath = newgidmapPath
return nil
}
}

View file

@ -0,0 +1,234 @@
// +build linux
package libcontainer
import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/mount"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {
return "", err
}
return dir, nil
}
func TestFactoryNew(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
}
func TestFactoryNewIntelRdt(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs, IntelRdtFs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
}
func TestFactoryNewTmpfs(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs, TmpfsRoot)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
mounted, err := mount.Mounted(lfactory.Root)
if err != nil {
t.Fatal(err)
}
if !mounted {
t.Fatalf("Factory Root is not mounted")
}
mounts, err := mount.GetMounts()
if err != nil {
t.Fatal(err)
}
var found bool
for _, m := range mounts {
if m.Mountpoint == lfactory.Root {
if m.Fstype != "tmpfs" {
t.Fatalf("Fstype of root: %s, expected %s", m.Fstype, "tmpfs")
}
if m.Source != "tmpfs" {
t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs")
}
found = true
}
}
if !found {
t.Fatalf("Factory Root is not listed in mounts list")
}
defer unix.Unmount(root, unix.MNT_DETACH)
}
func TestFactoryLoadNotExists(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
_, err = factory.Load("nocontainer")
if err == nil {
t.Fatal("expected nil error loading non-existing container")
}
lerr, ok := err.(Error)
if !ok {
t.Fatal("expected libcontainer error type")
}
if lerr.Code() != ContainerNotExists {
t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code())
}
}
func TestFactoryLoadContainer(t *testing.T) {
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
// setup default container config and state for mocking
var (
id = "1"
expectedHooks = &configs.Hooks{
Prestart: []configs.Hook{
configs.CommandHook{Command: configs.Command{Path: "prestart-hook"}},
},
Poststart: []configs.Hook{
configs.CommandHook{Command: configs.Command{Path: "poststart-hook"}},
},
Poststop: []configs.Hook{
unserializableHook{},
configs.CommandHook{Command: configs.Command{Path: "poststop-hook"}},
},
}
expectedConfig = &configs.Config{
Rootfs: "/mycontainer/root",
Hooks: expectedHooks,
}
expectedState = &State{
BaseState: BaseState{
InitProcessPid: 1024,
Config: *expectedConfig,
},
}
)
if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil {
t.Fatal(err)
}
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
t.Fatal(err)
}
factory, err := New(root, Cgroupfs, IntelRdtFs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Load(id)
if err != nil {
t.Fatal(err)
}
if container.ID() != id {
t.Fatalf("expected container id %q but received %q", id, container.ID())
}
config := container.Config()
if config.Rootfs != expectedConfig.Rootfs {
t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs)
}
expectedHooks.Poststop = expectedHooks.Poststop[1:] // expect unserializable hook to be skipped
if !reflect.DeepEqual(config.Hooks, expectedHooks) {
t.Fatalf("expects hooks %q but received %q", expectedHooks, config.Hooks)
}
lcontainer, ok := container.(*linuxContainer)
if !ok {
t.Fatal("expected linux container on linux based systems")
}
if lcontainer.initProcess.pid() != expectedState.InitProcessPid {
t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid())
}
}
func marshal(path string, v interface{}) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return utils.WriteJSON(f, v)
}
type unserializableHook struct{}
func (unserializableHook) Run(configs.HookState) error {
return nil
}

View file

@ -1,6 +1,7 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
@ -8,20 +9,6 @@ import (
"github.com/opencontainers/runc/libcontainer/stacktrace"
)
type syncType uint8
const (
procReady syncType = iota
procError
procRun
procHooks
procResume
)
type syncT struct {
Type syncType `json:"type"`
}
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}
@ -51,14 +38,27 @@ func newGenericError(err error, c ErrorCode) Error {
}
func newSystemError(err error) Error {
if le, ok := err.(Error); ok {
return le
}
return createSystemError(err, "")
}
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
return createSystemError(err, fmt.Sprintf(cause, v...))
}
func newSystemErrorWithCause(err error, cause string) Error {
return createSystemError(err, cause)
}
// createSystemError creates the specified error with the correct number of
// stack frames skipped. This is only to be called by the other functions for
// formatting the error.
func createSystemError(err error, cause string) Error {
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Stack: stacktrace.Capture(1),
Cause: cause,
Stack: stacktrace.Capture(2),
}
if err != nil {
gerr.Message = err.Error()
@ -70,12 +70,17 @@ type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Cause string
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
return e.Message
if e.Cause == "" {
return e.Message
}
frame := e.Stack.Frames[0]
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
}
func (e *genericError) Code() ErrorCode {

View file

@ -0,0 +1,49 @@
package libcontainer
import (
"fmt"
"io/ioutil"
"testing"
)
func TestErrorDetail(t *testing.T) {
err := newGenericError(fmt.Errorf("test error"), SystemError)
if derr := err.Detail(ioutil.Discard); derr != nil {
t.Fatal(derr)
}
}
func TestErrorWithCode(t *testing.T) {
err := newGenericError(fmt.Errorf("test error"), SystemError)
if code := err.Code(); code != SystemError {
t.Fatalf("expected err code %q but %q", SystemError, code)
}
}
func TestErrorWithError(t *testing.T) {
cc := []struct {
errmsg string
cause string
}{
{
errmsg: "test error",
},
{
errmsg: "test error",
cause: "test",
},
}
for _, v := range cc {
err := newSystemErrorWithCause(fmt.Errorf(v.errmsg), v.cause)
msg := err.Error()
if v.cause == "" && msg != v.errmsg {
t.Fatalf("expected err(%q) equal errmsg(%q)", msg, v.errmsg)
}
if v.cause != "" && msg == v.errmsg {
t.Fatalf("unexpected err(%q) equal errmsg(%q)", msg, v.errmsg)
}
}
}

View file

@ -6,19 +6,21 @@ import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"strconv"
"strings"
"syscall"
"syscall" // only for Errno
"unsafe"
"github.com/Sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/containerd/console"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
@ -30,7 +32,8 @@ const (
)
type pid struct {
Pid int `json:"pid"`
Pid int `json:"pid"`
PidFirstChild int `json:"pid_first"`
}
// network is an internal struct used to setup container networks.
@ -44,27 +47,31 @@ type network struct {
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities []string `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities *configs.Capabilities `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"`
ConsoleWidth uint16 `json:"console_width"`
ConsoleHeight uint16 `json:"console_height"`
Rootless bool `json:"rootless"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File) (initer, error) {
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
@ -75,13 +82,17 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
switch t {
case initSetns:
return &linuxSetnsInit{
config: config,
pipe: pipe,
consoleSocket: consoleSocket,
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
parentPid: syscall.Getppid(),
config: config,
pipe: pipe,
consoleSocket: consoleSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
@ -113,16 +124,18 @@ func finalizeNamespace(config *initConfig) error {
return err
}
capabilities := config.Config.Capabilities
capabilities := &configs.Capabilities{}
if config.Capabilities != nil {
capabilities = config.Capabilities
} else if config.Config.Capabilities != nil {
capabilities = config.Config.Capabilities
}
w, err := newCapWhitelist(capabilities)
w, err := newContainerCapList(capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.dropBoundingSet(); err != nil {
if err := w.ApplyBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
@ -135,36 +148,79 @@ func finalizeNamespace(config *initConfig) error {
if err := system.ClearKeepCaps(); err != nil {
return err
}
// drop all other capabilities
if err := w.drop(); err != nil {
if err := w.ApplyCaps(); err != nil {
return err
}
if config.Cwd != "" {
if err := syscall.Chdir(config.Cwd); err != nil {
return err
if err := unix.Chdir(config.Cwd); err != nil {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
}
}
return nil
}
// setupConsole sets up the console from inside the container, and sends the
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
// consoles are scoped to a container properly (see runc#814 and the many
// issues related to that). This has to be run *after* we've pivoted to the new
// rootfs (and the users' configuration is entirely set up).
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
defer socket.Close()
// At this point, /dev/ptmx points to something that we would expect. We
// used to change the owner of the slave path, but since the /dev/pts mount
// can have gid=X set (at the users' option). So touching the owner of the
// slave PTY is not necessary, as the kernel will handle that for us. Note
// however, that setupUser (specifically fixStdioPermissions) *will* change
// the UID owner of the console to be the user the process will run as (so
// they can actually control their console).
pty, slavePath, err := console.NewPty()
if err != nil {
return err
}
if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
err = pty.Resize(console.WinSize{
Height: config.ConsoleHeight,
Width: config.ConsoleWidth,
})
if err != nil {
return err
}
}
// After we return from here, we don't need the console anymore.
defer pty.Close()
// Mount the console inside our rootfs.
if mount {
if err := mountConsole(slavePath); err != nil {
return err
}
}
// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
return err
}
// Now, dup over all the things.
return dupStdio(slavePath)
}
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil {
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procRun {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procRun); err != nil {
return err
}
return nil
}
@ -173,19 +229,15 @@ func syncParentReady(pipe io.ReadWriter) error {
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
if err := writeSync(pipe, procHooks); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procResume {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procResume); err != nil {
return err
}
return nil
}
@ -193,40 +245,69 @@ func syncParentHooks(pipe io.ReadWriter) error {
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Uid: 0,
Gid: 0,
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
/* var addGroups []int
if len(config.Config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}*/
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
var addGroups []int
if len(config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}
// Rather than just erroring out later in setuid(2) and setgid(2), check
// that the user is mapped here.
if _, err := config.Config.HostUID(execUser.Uid); err != nil {
return fmt.Errorf("cannot set uid to unmapped user in user namespace")
}
if _, err := config.Config.HostGID(execUser.Gid); err != nil {
return fmt.Errorf("cannot set gid to unmapped user in user namespace")
}
if config.Rootless {
// We cannot set any additional groups in a rootless container and thus
// we bail if the user asked us to do so. TODO: We currently can't do
// this check earlier, but if libcontainer.Process.User was typesafe
// this might work.
if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container")
}
}
// Before we change to the container's user make sure that the processes
// STDIO is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(config, execUser); err != nil {
return err
}
/*
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
// There's nothing we can do about /etc/group entries, so we silently
// ignore setting groups here (since the user didn't explicitly ask us to
// set the group).
if !config.Rootless {
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}*/
if err := unix.Setgroups(suppGroups); err != nil {
return err
}
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
@ -234,6 +315,7 @@ func setupUser(config *initConfig) error {
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
@ -246,9 +328,9 @@ func setupUser(config *initConfig) error {
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(u *user.ExecUser) error {
var null syscall.Stat_t
if err := syscall.Stat("/dev/null", &null); err != nil {
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
var null unix.Stat_t
if err := unix.Stat("/dev/null", &null); err != nil {
return err
}
for _, fd := range []uintptr{
@ -256,15 +338,32 @@ func fixStdioPermissions(u *user.ExecUser) error {
os.Stderr.Fd(),
os.Stdout.Fd(),
} {
var s syscall.Stat_t
if err := syscall.Fstat(int(fd), &s); err != nil {
var s unix.Stat_t
if err := unix.Fstat(int(fd), &s); err != nil {
return err
}
// skip chown of /dev/null if it was used as one of the STDIO fds.
// Skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
// We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
// If we've hit an EINVAL then s.Gid isn't mapped in the user
// namespace. If we've hit an EPERM then the inode's current owner
// is not mapped in our user namespace (in particular,
// privileged_wrt_inode_uidgid() has failed). In either case, we
// are in a configuration where it's better for us to just not
// touch the stdio rather than bail at this point.
if err == unix.EINVAL || err == unix.EPERM {
continue
}
return err
}
}
@ -319,23 +418,58 @@ func setupRoute(config *configs.Config) error {
func setupRlimits(limits []configs.Rlimit, pid int) error {
for _, rlimit := range limits {
if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
func setOomScoreAdj(oomScoreAdj int, pid int) error {
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
const _P_PID = 1
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
type siginfo struct {
si_signo int32
si_errno int32
si_code int32
// below here is a union; si_pid is the only field we use
si_pid int32
// Pad to 128 bytes as detailed in blockUntilWaitable
pad [96]byte
}
// killCgroupProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending a SIGKILL to each process then waiting for them to
// exit.
func killCgroupProcesses(m cgroups.Manager) error {
// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
si := &siginfo{}
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
if e != 0 {
return false, os.NewSyscallError("waitid", e)
}
return si.si_pid != 0, nil
}
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
func isNoChildren(err error) bool {
switch err := err.(type) {
case syscall.Errno:
if err == unix.ECHILD {
return true
}
case *os.SyscallError:
if err.Err == unix.ECHILD {
return true
}
}
return false
}
// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL then it will wait for each process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
@ -352,16 +486,48 @@ func killCgroupProcesses(m cgroups.Manager) error {
continue
}
procs = append(procs, p)
if err := p.Kill(); err != nil {
if err := p.Signal(s); err != nil {
logrus.Warn(err)
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}
subreaper, err := system.GetSubreaper()
if err != nil {
// The error here means that PR_GET_CHILD_SUBREAPER is not
// supported because this code might run on a kernel older
// than 3.4. We don't want to throw an error in that case,
// and we simplify things, considering there is no subreaper
// set.
subreaper = 0
}
for _, p := range procs {
if _, err := p.Wait(); err != nil {
logrus.Warn(err)
if s != unix.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil {
if !isNoChildren(err) {
logrus.Warn("signalAllProcesses: ", p.Pid, err)
}
continue
} else if !ok {
// Not ready to report so don't wait
continue
}
}
// In case a subreaper has been setup, this code must not
// wait for the process. Otherwise, we cannot be sure the
// current process will be reaped by the subreaper, while
// the subreaper might be waiting for this process in order
// to retrieve its exit code.
if subreaper == 0 {
if _, err := p.Wait(); err != nil {
if !isNoChildren(err) {
logrus.Warn("wait: ", err)
}
}
}
}
return nil

View file

@ -0,0 +1,255 @@
package integration
import (
"bufio"
"bytes"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
func showFile(t *testing.T, fname string) error {
t.Logf("=== %s ===\n", fname)
f, err := os.Open(fname)
if err != nil {
t.Log(err)
return err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
t.Log(scanner.Text())
}
if err := scanner.Err(); err != nil {
return err
}
t.Logf("=== END ===\n")
return nil
}
func TestUsernsCheckpoint(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
cmd := exec.Command("criu", "check", "--feature", "userns")
if err := cmd.Run(); err != nil {
t.Skip("Unable to c/r a container with userns")
}
testCheckpoint(t, true)
}
func TestCheckpoint(t *testing.T) {
testCheckpoint(t, false)
}
func testCheckpoint(t *testing.T, userns bool) {
if testing.Short() {
return
}
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Mounts = append(config.Mounts, &configs.Mount{
Destination: "/sys/fs/cgroup",
Device: "cgroup",
Flags: defaultMountFlags | unix.MS_RDONLY,
})
if userns {
config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}}
config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}}
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
}
factory, err := libcontainer.New(root, libcontainer.Cgroupfs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Create("test", config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
stdinR, stdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
var stdout bytes.Buffer
pconfig := libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
Stdout: &stdout,
}
err = container.Run(&pconfig)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
}
pid, err := pconfig.Pid()
if err != nil {
t.Fatal(err)
}
process, err := os.FindProcess(pid)
if err != nil {
t.Fatal(err)
}
parentDir, err := ioutil.TempDir("", "criu-parent")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(parentDir)
preDumpOpts := &libcontainer.CriuOpts{
ImagesDirectory: parentDir,
WorkDirectory: parentDir,
PreDump: true,
}
preDumpLog := filepath.Join(preDumpOpts.WorkDirectory, "dump.log")
if err := container.Checkpoint(preDumpOpts); err != nil {
showFile(t, preDumpLog)
t.Fatal(err)
}
state, err := container.Status()
if err != nil {
t.Fatal(err)
}
if state != libcontainer.Running {
t.Fatal("Unexpected preDump state: ", state)
}
imagesDir, err := ioutil.TempDir("", "criu")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(imagesDir)
checkpointOpts := &libcontainer.CriuOpts{
ImagesDirectory: imagesDir,
WorkDirectory: imagesDir,
ParentImage: "../criu-parent",
}
dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log")
restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log")
if err := container.Checkpoint(checkpointOpts); err != nil {
showFile(t, dumpLog)
t.Fatal(err)
}
state, err = container.Status()
if err != nil {
t.Fatal(err)
}
if state != libcontainer.Stopped {
t.Fatal("Unexpected state checkpoint: ", state)
}
stdinW.Close()
_, err = process.Wait()
if err != nil {
t.Fatal(err)
}
// reload the container
container, err = factory.Load("test")
if err != nil {
t.Fatal(err)
}
restoreStdinR, restoreStdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
restoreProcessConfig := &libcontainer.Process{
Cwd: "/",
Stdin: restoreStdinR,
Stdout: &stdout,
}
err = container.Restore(restoreProcessConfig, checkpointOpts)
restoreStdinR.Close()
defer restoreStdinW.Close()
if err != nil {
showFile(t, restoreLog)
t.Fatal(err)
}
state, err = container.Status()
if err != nil {
t.Fatal(err)
}
if state != libcontainer.Running {
t.Fatal("Unexpected restore state: ", state)
}
pid, err = restoreProcessConfig.Pid()
if err != nil {
t.Fatal(err)
}
process, err = os.FindProcess(pid)
if err != nil {
t.Fatal(err)
}
_, err = restoreStdinW.WriteString("Hello!")
if err != nil {
t.Fatal(err)
}
restoreStdinW.Close()
s, err := process.Wait()
if err != nil {
t.Fatal(err)
}
if !s.Success() {
t.Fatal(s.String(), pid)
}
output := string(stdout.Bytes())
if !strings.Contains(output, "Hello!") {
t.Fatal("Did not restore the pipe correctly:", output)
}
}

View file

@ -0,0 +1,2 @@
// integration is used for integration testing of libcontainer
package integration

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,597 @@
package integration
import (
"bytes"
"fmt"
"io"
"os"
"strconv"
"strings"
"testing"
"time"
"github.com/containerd/console"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
func TestExecIn(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
ok(t, err)
buffers := newStdBuffers()
ps := &libcontainer.Process{
Cwd: "/",
Args: []string{"ps"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Run(ps)
ok(t, err)
waitProcess(ps, t)
stdinW.Close()
waitProcess(process, t)
out := buffers.Stdout.String()
if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") {
t.Fatalf("unexpected running process, output %q", out)
}
if strings.Contains(out, "\r") {
t.Fatalf("unexpected carriage-return in output")
}
}
func TestExecInUsernsRlimit(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
testExecInRlimit(t, true)
}
func TestExecInRlimit(t *testing.T) {
testExecInRlimit(t, false)
}
func testExecInRlimit(t *testing.T, userns bool) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
if userns {
config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}}
config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}}
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
}
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
ok(t, err)
buffers := newStdBuffers()
ps := &libcontainer.Process{
Cwd: "/",
Args: []string{"/bin/sh", "-c", "ulimit -n"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
Rlimits: []configs.Rlimit{
// increase process rlimit higher than container rlimit to test per-process limit
{Type: unix.RLIMIT_NOFILE, Hard: 1026, Soft: 1026},
},
}
err = container.Run(ps)
ok(t, err)
waitProcess(ps, t)
stdinW.Close()
waitProcess(process, t)
out := buffers.Stdout.String()
if limit := strings.TrimSpace(out); limit != "1026" {
t.Fatalf("expected rlimit to be 1026, got %s", limit)
}
}
func TestExecInAdditionalGroups(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
ok(t, err)
var stdout bytes.Buffer
pconfig := libcontainer.Process{
Cwd: "/",
Args: []string{"sh", "-c", "id", "-Gn"},
Env: standardEnvironment,
Stdin: nil,
Stdout: &stdout,
AdditionalGroups: []string{"plugdev", "audio"},
}
err = container.Run(&pconfig)
ok(t, err)
// Wait for process
waitProcess(&pconfig, t)
stdinW.Close()
waitProcess(process, t)
outputGroups := string(stdout.Bytes())
// Check that the groups output has the groups that we specified
if !strings.Contains(outputGroups, "audio") {
t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups)
}
if !strings.Contains(outputGroups, "plugdev") {
t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups)
}
}
func TestExecInError(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer func() {
stdinW.Close()
if _, err := process.Wait(); err != nil {
t.Log(err)
}
}()
ok(t, err)
for i := 0; i < 42; i++ {
var out bytes.Buffer
unexistent := &libcontainer.Process{
Cwd: "/",
Args: []string{"unexistent"},
Env: standardEnvironment,
Stderr: &out,
}
err = container.Run(unexistent)
if err == nil {
t.Fatal("Should be an error")
}
if !strings.Contains(err.Error(), "executable file not found") {
t.Fatalf("Should be error about not found executable, got %s", err)
}
if !bytes.Contains(out.Bytes(), []byte("executable file not found")) {
t.Fatalf("executable file not found error not delivered to stdio:\n%s", out.String())
}
}
}
func TestExecInTTY(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
ok(t, err)
var stdout bytes.Buffer
ps := &libcontainer.Process{
Cwd: "/",
Args: []string{"ps"},
Env: standardEnvironment,
}
parent, child, err := utils.NewSockPair("console")
if err != nil {
ok(t, err)
}
defer parent.Close()
defer child.Close()
ps.ConsoleSocket = child
type cdata struct {
c console.Console
err error
}
dc := make(chan *cdata, 1)
go func() {
f, err := utils.RecvFd(parent)
if err != nil {
dc <- &cdata{
err: err,
}
return
}
c, err := console.ConsoleFromFile(f)
if err != nil {
dc <- &cdata{
err: err,
}
return
}
console.ClearONLCR(c.Fd())
dc <- &cdata{
c: c,
}
}()
err = container.Run(ps)
ok(t, err)
data := <-dc
if data.err != nil {
ok(t, data.err)
}
console := data.c
copy := make(chan struct{})
go func() {
io.Copy(&stdout, console)
close(copy)
}()
ok(t, err)
select {
case <-time.After(5 * time.Second):
t.Fatal("Waiting for copy timed out")
case <-copy:
}
waitProcess(ps, t)
stdinW.Close()
waitProcess(process, t)
out := stdout.String()
if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") {
t.Fatalf("unexpected running process, output %q", out)
}
if strings.Contains(out, "\r") {
t.Fatalf("unexpected carriage-return in output")
}
}
func TestExecInEnvironment(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
ok(t, err)
buffers := newStdBuffers()
process2 := &libcontainer.Process{
Cwd: "/",
Args: []string{"env"},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"DEBUG=true",
"DEBUG=false",
"ENV=test",
},
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Run(process2)
ok(t, err)
waitProcess(process2, t)
stdinW.Close()
waitProcess(process, t)
out := buffers.Stdout.String()
// check execin's process environment
if !strings.Contains(out, "DEBUG=false") ||
!strings.Contains(out, "ENV=test") ||
!strings.Contains(out, "HOME=/root") ||
!strings.Contains(out, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin") ||
strings.Contains(out, "DEBUG=true") {
t.Fatalf("unexpected running process, output %q", out)
}
}
func TestExecinPassExtraFiles(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
}
var stdout bytes.Buffer
pipeout1, pipein1, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
pipeout2, pipein2, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
inprocess := &libcontainer.Process{
Cwd: "/",
Args: []string{"sh", "-c", "cd /proc/$$/fd; echo -n *; echo -n 1 >3; echo -n 2 >4"},
Env: []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"},
ExtraFiles: []*os.File{pipein1, pipein2},
Stdin: nil,
Stdout: &stdout,
}
err = container.Run(inprocess)
if err != nil {
t.Fatal(err)
}
waitProcess(inprocess, t)
stdinW.Close()
waitProcess(process, t)
out := string(stdout.Bytes())
// fd 5 is the directory handle for /proc/$$/fd
if out != "0 1 2 3 4 5" {
t.Fatalf("expected to have the file descriptors '0 1 2 3 4 5' passed to exec, got '%s'", out)
}
var buf = []byte{0}
_, err = pipeout1.Read(buf)
if err != nil {
t.Fatal(err)
}
out1 := string(buf)
if out1 != "1" {
t.Fatalf("expected first pipe to receive '1', got '%s'", out1)
}
_, err = pipeout2.Read(buf)
if err != nil {
t.Fatal(err)
}
out2 := string(buf)
if out2 != "2" {
t.Fatalf("expected second pipe to receive '2', got '%s'", out2)
}
}
func TestExecInOomScoreAdj(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.OomScoreAdj = 200
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
ok(t, err)
buffers := newStdBuffers()
ps := &libcontainer.Process{
Cwd: "/",
Args: []string{"/bin/sh", "-c", "cat /proc/self/oom_score_adj"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Run(ps)
ok(t, err)
waitProcess(ps, t)
stdinW.Close()
waitProcess(process, t)
out := buffers.Stdout.String()
if oomScoreAdj := strings.TrimSpace(out); oomScoreAdj != strconv.Itoa(config.OomScoreAdj) {
t.Fatalf("expected oomScoreAdj to be %d, got %s", config.OomScoreAdj, oomScoreAdj)
}
}
func TestExecInUserns(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}}
config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}}
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
ok(t, err)
process := &libcontainer.Process{
Cwd: "/",
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Run(process)
stdinR.Close()
defer stdinW.Close()
ok(t, err)
initPID, err := process.Pid()
ok(t, err)
initUserns, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/user", initPID))
ok(t, err)
buffers := newStdBuffers()
process2 := &libcontainer.Process{
Cwd: "/",
Args: []string{"readlink", "/proc/self/ns/user"},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
},
Stdout: buffers.Stdout,
Stderr: os.Stderr,
}
err = container.Run(process2)
ok(t, err)
waitProcess(process2, t)
stdinW.Close()
waitProcess(process, t)
if out := strings.TrimSpace(buffers.Stdout.String()); out != initUserns {
t.Errorf("execin userns(%s), wanted %s", out, initUserns)
}
}

View file

@ -0,0 +1,61 @@
package integration
import (
"os"
"runtime"
"testing"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
_ "github.com/opencontainers/runc/libcontainer/nsenter"
"github.com/sirupsen/logrus"
)
// init runs the libcontainer initialization code because of the busybox style needs
// to work around the go runtime and the issues with forking
func init() {
if len(os.Args) < 2 || os.Args[1] != "init" {
return
}
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, err := libcontainer.New("")
if err != nil {
logrus.Fatalf("unable to initialize for container: %s", err)
}
if err := factory.StartInitialization(); err != nil {
logrus.Fatal(err)
}
}
var (
factory libcontainer.Factory
systemdFactory libcontainer.Factory
)
func TestMain(m *testing.M) {
var (
err error
ret int
)
logrus.SetOutput(os.Stderr)
logrus.SetLevel(logrus.InfoLevel)
factory, err = libcontainer.New("/run/libctTests", libcontainer.Cgroupfs)
if err != nil {
logrus.Error(err)
os.Exit(1)
}
if systemd.UseSystemd() {
systemdFactory, err = libcontainer.New("/run/libctTests", libcontainer.SystemdCgroups)
if err != nil {
logrus.Error(err)
os.Exit(1)
}
}
ret = m.Run()
os.Exit(ret)
}

View file

@ -0,0 +1,419 @@
// +build linux,cgo,seccomp
package integration
import (
"strings"
"syscall"
"testing"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
libseccomp "github.com/seccomp/libseccomp-golang"
)
func TestSeccompDenyGetcwd(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Action: configs.Errno,
},
},
}
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Cwd: "/",
Args: []string{"pwd"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Run(pwd)
if err != nil {
t.Fatal(err)
}
ps, err := pwd.Wait()
if err == nil {
t.Fatal("Expecting error (negative return code); instead exited cleanly!")
}
var exitCode int
status := ps.Sys().(syscall.WaitStatus)
if status.Exited() {
exitCode = status.ExitStatus()
} else if status.Signaled() {
exitCode = -int(status.Signal())
} else {
t.Fatalf("Unrecognized exit reason!")
}
if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
}
expected := "pwd: getcwd: Operation not permitted"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}
func TestSeccompPermitWriteConditional(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 2,
Op: configs.EqualTo,
},
},
},
},
}
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
buffers := newStdBuffers()
dmesg := &libcontainer.Process{
Cwd: "/",
Args: []string{"busybox", "ls", "/"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Run(dmesg)
if err != nil {
t.Fatal(err)
}
if _, err := dmesg.Wait(); err != nil {
t.Fatalf("%s: %s", err, buffers.Stderr)
}
}
func TestSeccompDenyWriteConditional(t *testing.T) {
if testing.Short() {
return
}
// Only test if library version is v2.2.1 or higher
// Conditional filtering will always error in v2.2.0 and lower
major, minor, micro := libseccomp.GetLibraryVersion()
if (major == 2 && minor < 2) || (major == 2 && minor == 2 && micro < 1) {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 2,
Op: configs.EqualTo,
},
},
},
},
}
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
buffers := newStdBuffers()
dmesg := &libcontainer.Process{
Cwd: "/",
Args: []string{"busybox", "ls", "does_not_exist"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Run(dmesg)
if err != nil {
t.Fatal(err)
}
ps, err := dmesg.Wait()
if err == nil {
t.Fatal("Expecting negative return, instead got 0!")
}
var exitCode int
status := ps.Sys().(syscall.WaitStatus)
if status.Exited() {
exitCode = status.ExitStatus()
} else if status.Signaled() {
exitCode = -int(status.Signal())
} else {
t.Fatalf("Unrecognized exit reason!")
}
if exitCode == 0 {
t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode)
}
// We're denying write to stderr, so we expect an empty buffer
expected := ""
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}
func TestSeccompPermitWriteMultipleConditions(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 2,
Op: configs.EqualTo,
},
{
Index: 2,
Value: 0,
Op: configs.NotEqualTo,
},
},
},
},
}
buffers, exitCode, err := runContainer(config, "", "ls", "/")
if err != nil {
t.Fatalf("%s: %s", buffers, err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d buffers %s", exitCode, buffers)
}
// We don't need to verify the actual thing printed
// Just that something was written to stdout
if len(buffers.Stdout.String()) == 0 {
t.Fatalf("Nothing was written to stdout, write call failed!\n")
}
}
func TestSeccompDenyWriteMultipleConditions(t *testing.T) {
if testing.Short() {
return
}
// Only test if library version is v2.2.1 or higher
// Conditional filtering will always error in v2.2.0 and lower
major, minor, micro := libseccomp.GetLibraryVersion()
if (major == 2 && minor < 2) || (major == 2 && minor == 2 && micro < 1) {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 2,
Op: configs.EqualTo,
},
{
Index: 2,
Value: 0,
Op: configs.NotEqualTo,
},
},
},
},
}
buffers, exitCode, err := runContainer(config, "", "ls", "/does_not_exist")
if err == nil {
t.Fatalf("Expecting error return, instead got 0")
}
if exitCode == 0 {
t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode)
}
expected := ""
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}
func TestSeccompMultipleConditionSameArgDeniesStdout(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
// Prevent writing to both stdout and stderr
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 1,
Op: configs.EqualTo,
},
{
Index: 0,
Value: 2,
Op: configs.EqualTo,
},
},
},
},
}
buffers, exitCode, err := runContainer(config, "", "ls", "/")
if err != nil {
t.Fatalf("%s: %s", buffers, err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d buffers %s", exitCode, buffers)
}
// Verify that nothing was printed
if len(buffers.Stdout.String()) != 0 {
t.Fatalf("Something was written to stdout, write call succeeded!\n")
}
}
func TestSeccompMultipleConditionSameArgDeniesStderr(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
// Prevent writing to both stdout and stderr
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 1,
Op: configs.EqualTo,
},
{
Index: 0,
Value: 2,
Op: configs.EqualTo,
},
},
},
},
}
buffers, exitCode, err := runContainer(config, "", "ls", "/does_not_exist")
if err == nil {
t.Fatalf("Expecting error return, instead got 0")
}
if exitCode == 0 {
t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode)
}
// Verify nothing was printed
if len(buffers.Stderr.String()) != 0 {
t.Fatalf("Something was written to stderr, write call succeeded!\n")
}
}

View file

@ -0,0 +1,191 @@
package integration
import (
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
var standardEnvironment = []string{
"HOME=/root",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=integration",
"TERM=xterm",
}
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
// newTemplateConfig returns a base template for running a container
//
// it uses a network strategy of just setting a loopback interface
// and the default setup for devices
func newTemplateConfig(rootfs string) *configs.Config {
allowAllDevices := false
return &configs.Config{
Rootfs: rootfs,
Capabilities: &configs.Capabilities{
Bounding: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Permitted: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Inheritable: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Ambient: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Effective: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
{Type: configs.NEWPID},
{Type: configs.NEWNET},
}),
Cgroups: &configs.Cgroup{
Path: "integration/test",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: &allowAllDevices,
AllowedDevices: configs.DefaultAllowedDevices,
},
},
MaskPaths: []string{
"/proc/kcore",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "integration",
Mounts: []*configs.Mount{
{
Source: "proc",
Destination: "/proc",
Device: "proc",
Flags: defaultMountFlags,
},
{
Source: "tmpfs",
Destination: "/dev",
Device: "tmpfs",
Flags: unix.MS_NOSUID | unix.MS_STRICTATIME,
Data: "mode=755",
},
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
Flags: unix.MS_NOSUID | unix.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
{
Device: "tmpfs",
Source: "shm",
Destination: "/dev/shm",
Data: "mode=1777,size=65536k",
Flags: defaultMountFlags,
},
/*
CI is broken on the debian based kernels with this
{
Source: "mqueue",
Destination: "/dev/mqueue",
Device: "mqueue",
Flags: defaultMountFlags,
},
*/
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | unix.MS_RDONLY,
},
},
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: unix.RLIMIT_NOFILE,
Hard: uint64(1025),
Soft: uint64(1025),
},
},
}
}

View file

@ -0,0 +1,170 @@
package integration
import (
"bytes"
"crypto/md5"
"encoding/hex"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"syscall"
"testing"
"time"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
)
func newStdBuffers() *stdBuffers {
return &stdBuffers{
Stdin: bytes.NewBuffer(nil),
Stdout: bytes.NewBuffer(nil),
Stderr: bytes.NewBuffer(nil),
}
}
type stdBuffers struct {
Stdin *bytes.Buffer
Stdout *bytes.Buffer
Stderr *bytes.Buffer
}
func (b *stdBuffers) String() string {
s := []string{}
if b.Stderr != nil {
s = append(s, b.Stderr.String())
}
if b.Stdout != nil {
s = append(s, b.Stdout.String())
}
return strings.Join(s, "|")
}
// ok fails the test if an err is not nil.
func ok(t testing.TB, err error) {
if err != nil {
_, file, line, _ := runtime.Caller(1)
t.Fatalf("%s:%d: unexpected error: %s\n\n", filepath.Base(file), line, err.Error())
}
}
func waitProcess(p *libcontainer.Process, t *testing.T) {
_, file, line, _ := runtime.Caller(1)
status, err := p.Wait()
if err != nil {
t.Fatalf("%s:%d: unexpected error: %s\n\n", filepath.Base(file), line, err.Error())
}
if !status.Success() {
t.Fatalf("%s:%d: unexpected status: %s\n\n", filepath.Base(file), line, status.String())
}
}
func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {
return "", err
}
if err := os.MkdirAll(dir, 0700); err != nil {
return "", err
}
return dir, nil
}
func newTestBundle() (string, error) {
dir, err := ioutil.TempDir("", "bundle")
if err != nil {
return "", err
}
if err := os.MkdirAll(dir, 0700); err != nil {
return "", err
}
return dir, nil
}
// newRootfs creates a new tmp directory and copies the busybox root filesystem
func newRootfs() (string, error) {
dir, err := ioutil.TempDir("", "")
if err != nil {
return "", err
}
if err := os.MkdirAll(dir, 0700); err != nil {
return "", err
}
if err := copyBusybox(dir); err != nil {
return "", err
}
return dir, nil
}
func remove(dir string) {
os.RemoveAll(dir)
}
// copyBusybox copies the rootfs for a busybox container created for the test image
// into the new directory for the specific test
func copyBusybox(dest string) error {
out, err := exec.Command("sh", "-c", fmt.Sprintf("cp -a /busybox/* %s/", dest)).CombinedOutput()
if err != nil {
return fmt.Errorf("copy error %q: %q", err, out)
}
return nil
}
func newContainer(config *configs.Config) (libcontainer.Container, error) {
h := md5.New()
h.Write([]byte(time.Now().String()))
return newContainerWithName(hex.EncodeToString(h.Sum(nil)), config)
}
func newContainerWithName(name string, config *configs.Config) (libcontainer.Container, error) {
f := factory
if config.Cgroups != nil && config.Cgroups.Parent == "system.slice" {
f = systemdFactory
}
return f.Create(name, config)
}
// runContainer runs the container with the specific config and arguments
//
// buffers are returned containing the STDOUT and STDERR output for the run
// along with the exit code and any go error
func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) {
container, err := newContainer(config)
if err != nil {
return nil, -1, err
}
defer container.Destroy()
buffers = newStdBuffers()
process := &libcontainer.Process{
Cwd: "/",
Args: args,
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Run(process)
if err != nil {
return buffers, -1, err
}
ps, err := process.Wait()
if err != nil {
return buffers, -1, err
}
status := ps.Sys().(syscall.WaitStatus)
if status.Exited() {
exitCode = status.ExitStatus()
} else if status.Signaled() {
exitCode = -int(status.Signal())
} else {
return buffers, -1, err
}
return
}

View file

@ -0,0 +1,553 @@
// +build linux
package intelrdt
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/opencontainers/runc/libcontainer/configs"
)
/*
* About Intel RDT/CAT feature:
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
* Cache is the only resource that is supported in RDT.
*
* This feature provides a way for the software to restrict cache allocation to a
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
* The different subsets are identified by class of service (CLOS) and each CLOS
* has a capacity bitmask (CBM).
*
* For more information about Intel RDT/CAT can be found in the section 17.17
* of Intel Software Developer Manual.
*
* About Intel RDT/CAT kernel interface:
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
* "resource control" filesystem, which is a "cgroup-like" interface.
*
* Comparing with cgroups, it has similar process management lifecycle and
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
* filesystem layout.
*
* Intel RDT "resource control" filesystem hierarchy:
* mount -t resctrl resctrl /sys/fs/resctrl
* tree /sys/fs/resctrl
* /sys/fs/resctrl/
* |-- info
* | |-- L3
* | |-- cbm_mask
* | |-- min_cbm_bits
* | |-- num_closids
* |-- cpus
* |-- schemata
* |-- tasks
* |-- <container_id>
* |-- cpus
* |-- schemata
* |-- tasks
*
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
* resource constraints.
*
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
* <container_id>" group). Tasks can be added to a group by writing the task ID
* to the "tasks" file (which will automatically remove them from the previous
* group to which they belonged). New tasks created by fork(2) and clone(2) are
* added to the same group as their parent. If a pid is not in any sub group, it is
* in root group.
*
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
* which contains L3 cache id and capacity bitmask (CBM).
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
*
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
* be set is less than the max bit. The max bits in the CBM is varied among
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
*
* For more information about Intel RDT/CAT kernel interface:
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
*
* An example for runc:
* Consider a two-socket machine with two L3 caches where the default CBM is
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
* the "lower" 50% L3 cache id 1:
*
* "linux": {
* "intelRdt": {
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
* }
* }
*/
type Manager interface {
// Applies Intel RDT configuration to the process with the specified pid
Apply(pid int) error
// Returns statistics for Intel RDT
GetStats() (*Stats, error)
// Destroys the Intel RDT 'container_id' group
Destroy() error
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
GetPath() string
// Set Intel RDT "resource control" filesystem as configured.
Set(container *configs.Config) error
}
// This implements interface Manager
type IntelRdtManager struct {
mu sync.Mutex
Config *configs.Config
Id string
Path string
}
const (
IntelRdtTasks = "tasks"
)
var (
// The absolute root path of the Intel RDT "resource control" filesystem
intelRdtRoot string
intelRdtRootLock sync.Mutex
// The flag to indicate if Intel RDT is supported
isEnabled bool
)
type intelRdtData struct {
root string
config *configs.Config
pid int
}
// Check if Intel RDT is enabled in init()
func init() {
// 1. Check if hardware and kernel support Intel RDT/CAT feature
// "cat_l3" flag is set if supported
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
if !isFlagSet || err != nil {
isEnabled = false
return
}
// 2. Check if Intel RDT "resource control" filesystem is mounted
// The user guarantees to mount the filesystem
isEnabled = isIntelRdtMounted()
}
// Return the mount point path of Intel RDT "resource control" filesysem
func findIntelRdtMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
text := s.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
numPostFields := len(postSeparatorFields)
// This is an error as we can't detect if the mount is for "Intel RDT"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "resctrl" {
// Check that the mount is properly formated.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return fields[4], nil
}
}
if err := s.Err(); err != nil {
return "", err
}
return "", NewNotFoundError("Intel RDT")
}
// Gets the root path of Intel RDT "resource control" filesystem
func getIntelRdtRoot() (string, error) {
intelRdtRootLock.Lock()
defer intelRdtRootLock.Unlock()
if intelRdtRoot != "" {
return intelRdtRoot, nil
}
root, err := findIntelRdtMountpointDir()
if err != nil {
return "", err
}
if _, err := os.Stat(root); err != nil {
return "", err
}
intelRdtRoot = root
return intelRdtRoot, nil
}
func isIntelRdtMounted() bool {
_, err := getIntelRdtRoot()
if err != nil {
return false
}
return true
}
func parseCpuInfoFile(path string) (bool, error) {
f, err := os.Open(path)
if err != nil {
return false, err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return false, err
}
text := s.Text()
flags := strings.Split(text, " ")
// "cat_l3" flag is set if Intel RDT/CAT is supported
for _, flag := range flags {
if flag == "cat_l3" {
return true, nil
}
}
}
return false, nil
}
func parseUint(s string, base, bitSize int) (uint64, error) {
value, err := strconv.ParseUint(s, base, bitSize)
if err != nil {
intValue, intErr := strconv.ParseInt(s, base, bitSize)
// 1. Handle negative values greater than MinInt64 (and)
// 2. Handle negative values lesser than MinInt64
if intErr == nil && intValue < 0 {
return 0, nil
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
return 0, nil
}
return value, err
}
return value, nil
}
// Gets a single uint64 value from the specified file.
func getIntelRdtParamUint(path, file string) (uint64, error) {
fileName := filepath.Join(path, file)
contents, err := ioutil.ReadFile(fileName)
if err != nil {
return 0, err
}
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
if err != nil {
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
}
return res, nil
}
// Gets a string value from the specified file
func getIntelRdtParamString(path, file string) (string, error) {
contents, err := ioutil.ReadFile(filepath.Join(path, file))
if err != nil {
return "", err
}
return strings.TrimSpace(string(contents)), nil
}
func readTasksFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
if err != nil {
return nil, err
}
defer f.Close()
var (
s = bufio.NewScanner(f)
out = []int{}
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, pid)
}
}
return out, nil
}
func writeFile(dir, file, data string) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
}
return nil
}
func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
return &intelRdtData{
root: rootPath,
config: c,
pid: pid,
}, nil
}
// Get the read-only L3 cache information
func getL3CacheInfo() (*L3CacheInfo, error) {
l3CacheInfo := &L3CacheInfo{}
rootPath, err := getIntelRdtRoot()
if err != nil {
return l3CacheInfo, err
}
path := filepath.Join(rootPath, "info", "L3")
cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
if err != nil {
return l3CacheInfo, err
}
minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
if err != nil {
return l3CacheInfo, err
}
numClosids, err := getIntelRdtParamUint(path, "num_closids")
if err != nil {
return l3CacheInfo, err
}
l3CacheInfo.CbmMask = cbmMask
l3CacheInfo.MinCbmBits = minCbmBits
l3CacheInfo.NumClosids = numClosids
return l3CacheInfo, nil
}
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
func WriteIntelRdtTasks(dir string, pid int) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
}
// Dont attach any pid if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
}
}
return nil
}
// Check if Intel RDT is enabled
func IsEnabled() bool {
return isEnabled
}
// Get the 'container_id' path in Intel RDT "resource control" filesystem
func GetIntelRdtPath(id string) (string, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return "", err
}
path := filepath.Join(rootPath, id)
return path, nil
}
// Applies Intel RDT configuration to the process with the specified pid
func (m *IntelRdtManager) Apply(pid int) (err error) {
// If intelRdt is not specified in config, we do nothing
if m.Config.IntelRdt == nil {
return nil
}
d, err := getIntelRdtData(m.Config, pid)
if err != nil && !IsNotFound(err) {
return err
}
m.mu.Lock()
defer m.mu.Unlock()
path, err := d.join(m.Id)
if err != nil {
return err
}
m.Path = path
return nil
}
// Destroys the Intel RDT 'container_id' group
func (m *IntelRdtManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
if err := os.RemoveAll(m.Path); err != nil {
return err
}
m.Path = ""
return nil
}
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
func (m *IntelRdtManager) GetPath() string {
if m.Path == "" {
m.Path, _ = GetIntelRdtPath(m.Id)
}
return m.Path
}
// Returns statistics for Intel RDT
func (m *IntelRdtManager) GetStats() (*Stats, error) {
// If intelRdt is not specified in config
if m.Config.IntelRdt == nil {
return nil, nil
}
m.mu.Lock()
defer m.mu.Unlock()
stats := NewStats()
// The read-only L3 cache information
l3CacheInfo, err := getL3CacheInfo()
if err != nil {
return nil, err
}
stats.L3CacheInfo = l3CacheInfo
// The read-only L3 cache schema in root
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
stats.L3CacheSchemaRoot = schemaRootStrings[0]
// The L3 cache schema in 'container_id' group
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaStrings := strings.Split(tmpStrings, "\n")
stats.L3CacheSchema = schemaStrings[0]
return stats, nil
}
// Set Intel RDT "resource control" filesystem as configured.
func (m *IntelRdtManager) Set(container *configs.Config) error {
path := m.GetPath()
// About L3 cache schema file:
// The schema has allocation masks/values for L3 cache on each socket,
// which contains L3 cache id and capacity bitmask (CBM).
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
// For example, on a two-socket machine, L3's schema line could be:
// L3:0=ff;1=c0
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
//
// About L3 cache CBM validity:
// The valid L3 cache CBM is a *contiguous bits set* and number of
// bits that can be set is less than the max bit. The max bits in the
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
// "resource control" filesystem layout, the CBM in a group should
// be a subset of the CBM in root. Kernel will check if it is valid
// when writing.
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
// which mapping to entire L3 cache capacity. Some valid CBM values
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
if container.IntelRdt != nil {
l3CacheSchema := container.IntelRdt.L3CacheSchema
if l3CacheSchema != "" {
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
return err
}
}
}
return nil
}
func (raw *intelRdtData) join(id string) (string, error) {
path := filepath.Join(raw.root, id)
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
return "", err
}
return path, nil
}
type NotFoundError struct {
ResourceControl string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
}
func NewNotFoundError(res string) error {
return &NotFoundError{
ResourceControl: res,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}

View file

@ -0,0 +1,46 @@
// +build linux
package intelrdt
import (
"strings"
"testing"
)
func TestIntelRdtSetL3CacheSchema(t *testing.T) {
if !IsEnabled() {
return
}
helper := NewIntelRdtTestUtil(t)
defer helper.cleanup()
const (
l3CacheSchemaBefore = "L3:0=f;1=f0"
l3CacheSchemeAfter = "L3:0=f0;1=f"
)
helper.writeFileContents(map[string]string{
"schemata": l3CacheSchemaBefore + "\n",
})
helper.IntelRdtData.config.IntelRdt.L3CacheSchema = l3CacheSchemeAfter
intelrdt := &IntelRdtManager{
Config: helper.IntelRdtData.config,
Path: helper.IntelRdtPath,
}
if err := intelrdt.Set(helper.IntelRdtData.config); err != nil {
t.Fatal(err)
}
tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
if err != nil {
t.Fatalf("Failed to parse file 'schemata' - %s", err)
}
values := strings.Split(tmpStrings, "\n")
value := values[0]
if value != l3CacheSchemeAfter {
t.Fatal("Got the wrong value, set 'schemata' failed.")
}
}

View file

@ -0,0 +1,24 @@
// +build linux
package intelrdt
type L3CacheInfo struct {
CbmMask string `json:"cbm_mask,omitempty"`
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
NumClosids uint64 `json:"num_closids,omitempty"`
}
type Stats struct {
// The read-only L3 cache information
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
// The read-only L3 cache schema in root
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
// The L3 cache schema in 'container_id' group
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}
func NewStats() *Stats {
return &Stats{}
}

View file

@ -0,0 +1,67 @@
// +build linux
/*
* Utility for testing Intel RDT operations.
* Creates a mock of the Intel RDT "resource control" filesystem for the duration of the test.
*/
package intelrdt
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
type intelRdtTestUtil struct {
// intelRdt data to use in tests
IntelRdtData *intelRdtData
// Path to the mock Intel RDT "resource control" filesystem directory
IntelRdtPath string
// Temporary directory to store mock Intel RDT "resource control" filesystem
tempDir string
t *testing.T
}
// Creates a new test util
func NewIntelRdtTestUtil(t *testing.T) *intelRdtTestUtil {
d := &intelRdtData{
config: &configs.Config{
IntelRdt: &configs.IntelRdt{},
},
}
tempDir, err := ioutil.TempDir("", "intelrdt_test")
if err != nil {
t.Fatal(err)
}
d.root = tempDir
testIntelRdtPath := filepath.Join(d.root, "resctrl")
if err != nil {
t.Fatal(err)
}
// Ensure the full mock Intel RDT "resource control" filesystem path exists
err = os.MkdirAll(testIntelRdtPath, 0755)
if err != nil {
t.Fatal(err)
}
return &intelRdtTestUtil{IntelRdtData: d, IntelRdtPath: testIntelRdtPath, tempDir: tempDir, t: t}
}
func (c *intelRdtTestUtil) cleanup() {
os.RemoveAll(c.tempDir)
}
// Write the specified contents on the mock of the specified Intel RDT "resource control" files
func (c *intelRdtTestUtil) writeFileContents(fileContents map[string]string) {
for file, contents := range fileContents {
err := writeFile(c.IntelRdtPath, file, contents)
if err != nil {
c.t.Fatal(err)
}
}
}

View file

@ -1,35 +1,21 @@
// +build linux
package keyctl
package keys
import (
"fmt"
"strconv"
"strings"
"syscall"
"unsafe"
)
const KEYCTL_JOIN_SESSION_KEYRING = 1
const KEYCTL_SETPERM = 5
const KEYCTL_DESCRIBE = 6
"golang.org/x/sys/unix"
)
type KeySerial uint32
func JoinSessionKeyring(name string) (KeySerial, error) {
var _name *byte
var err error
if len(name) > 0 {
_name, err = syscall.BytePtrFromString(name)
if err != nil {
return KeySerial(0), err
}
}
sessKeyId, _, errn := syscall.Syscall(syscall.SYS_KEYCTL, KEYCTL_JOIN_SESSION_KEYRING, uintptr(unsafe.Pointer(_name)), 0)
if errn != 0 {
return 0, fmt.Errorf("could not create session key: %v", errn)
sessKeyId, err := unix.KeyctlJoinSessionKeyring(name)
if err != nil {
return 0, fmt.Errorf("could not create session key: %v", err)
}
return KeySerial(sessKeyId), nil
}
@ -38,14 +24,12 @@ func JoinSessionKeyring(name string) (KeySerial, error) {
// anding the bits with the given mask (clearing permissions) and setting
// additional permission bits
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
dest := make([]byte, 1024)
destBytes := unsafe.Pointer(&dest[0])
if _, _, err := syscall.Syscall6(syscall.SYS_KEYCTL, uintptr(KEYCTL_DESCRIBE), uintptr(ringId), uintptr(destBytes), uintptr(len(dest)), 0, 0); err != 0 {
dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId))
if err != nil {
return err
}
res := strings.Split(string(dest), ";")
res := strings.Split(dest, ";")
if len(res) < 5 {
return fmt.Errorf("Destination buffer for key description is too small")
}
@ -58,7 +42,7 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
perm := (uint32(perm64) & mask) | setbits
if _, _, err := syscall.Syscall(syscall.SYS_KEYCTL, uintptr(KEYCTL_SETPERM), uintptr(ringId), uintptr(perm)); err != 0 {
if err := unix.KeyctlSetperm(int(ringId), perm); err != nil {
return err
}

View file

@ -1,80 +0,0 @@
// +build !selinux !linux
package label
// InitLabels returns the process label and file labels to be used within
// the container. A list of options can be passed into this function to alter
// the labels.
func InitLabels(options []string) (string, string, error) {
return "", "", nil
}
func GenLabels(options string) (string, string, error) {
return "", "", nil
}
func FormatMountLabel(src string, mountLabel string) string {
return src
}
func SetProcessLabel(processLabel string) error {
return nil
}
func GetFileLabel(path string) (string, error) {
return "", nil
}
func SetFileLabel(path string, fileLabel string) error {
return nil
}
func SetFileCreateLabel(fileLabel string) error {
return nil
}
func Relabel(path string, fileLabel string, shared bool) error {
return nil
}
func GetPidLabel(pid int) (string, error) {
return "", nil
}
func Init() {
}
func ReserveLabel(label string) error {
return nil
}
func UnreserveLabel(label string) error {
return nil
}
// DupSecOpt takes a process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return nil
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return nil
}
// Validate checks that the label does not include unexpected options
func Validate(label string) error {
return nil
}
// RelabelNeeded checks whether the user requested a relabel
func RelabelNeeded(label string) bool {
return false
}
// IsShared checks that the label includes a "shared" mark
func IsShared(label string) bool {
return false
}

View file

@ -1,197 +0,0 @@
// +build selinux,linux
package label
import (
"fmt"
"strings"
"github.com/opencontainers/runc/libcontainer/selinux"
)
// Valid Label Options
var validOptions = map[string]bool{
"disable": true,
"type": true,
"user": true,
"role": true,
"level": true,
}
var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together")
// InitLabels returns the process label and file labels to be used within
// the container. A list of options can be passed into this function to alter
// the labels. The labels returned will include a random MCS String, that is
// guaranteed to be unique.
func InitLabels(options []string) (string, string, error) {
if !selinux.SelinuxEnabled() {
return "", "", nil
}
processLabel, mountLabel := selinux.GetLxcContexts()
if processLabel != "" {
pcon := selinux.NewContext(processLabel)
mcon := selinux.NewContext(mountLabel)
for _, opt := range options {
if opt == "disable" {
return "", "", nil
}
if i := strings.Index(opt, ":"); i == -1 {
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt)
}
con := strings.SplitN(opt, ":", 2)
if !validOptions[con[0]] {
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type'", con[0])
}
pcon[con[0]] = con[1]
if con[0] == "level" || con[0] == "user" {
mcon[con[0]] = con[1]
}
}
processLabel = pcon.Get()
mountLabel = mcon.Get()
}
return processLabel, mountLabel, nil
}
// DEPRECATED: The GenLabels function is only to be used during the transition to the official API.
func GenLabels(options string) (string, string, error) {
return InitLabels(strings.Fields(options))
}
// FormatMountLabel returns a string to be used by the mount command.
// The format of this string will be used to alter the labeling of the mountpoint.
// The string returned is suitable to be used as the options field of the mount command.
// If you need to have additional mount point options, you can pass them in as
// the first parameter. Second parameter is the label that you wish to apply
// to all content in the mount point.
func FormatMountLabel(src, mountLabel string) string {
if mountLabel != "" {
switch src {
case "":
src = fmt.Sprintf("context=%q", mountLabel)
default:
src = fmt.Sprintf("%s,context=%q", src, mountLabel)
}
}
return src
}
// SetProcessLabel takes a process label and tells the kernel to assign the
// label to the next program executed by the current process.
func SetProcessLabel(processLabel string) error {
if processLabel == "" {
return nil
}
return selinux.Setexeccon(processLabel)
}
// GetProcessLabel returns the process label that the kernel will assign
// to the next program executed by the current process. If "" is returned
// this indicates that the default labeling will happen for the process.
func GetProcessLabel() (string, error) {
return selinux.Getexeccon()
}
// GetFileLabel returns the label for specified path
func GetFileLabel(path string) (string, error) {
return selinux.Getfilecon(path)
}
// SetFileLabel modifies the "path" label to the specified file label
func SetFileLabel(path string, fileLabel string) error {
if selinux.SelinuxEnabled() && fileLabel != "" {
return selinux.Setfilecon(path, fileLabel)
}
return nil
}
// SetFileCreateLabel tells the kernel the label for all files to be created
func SetFileCreateLabel(fileLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setfscreatecon(fileLabel)
}
return nil
}
// Relabel changes the label of path to the filelabel string.
// It changes the MCS label to s0 if shared is true.
// This will allow all containers to share the content.
func Relabel(path string, fileLabel string, shared bool) error {
if !selinux.SelinuxEnabled() {
return nil
}
if fileLabel == "" {
return nil
}
exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true}
if exclude_paths[path] {
return fmt.Errorf("Relabeling of %s is not allowed", path)
}
if shared {
c := selinux.NewContext(fileLabel)
c["level"] = "s0"
fileLabel = c.Get()
}
return selinux.Chcon(path, fileLabel, true)
}
// GetPidLabel will return the label of the process running with the specified pid
func GetPidLabel(pid int) (string, error) {
return selinux.Getpidcon(pid)
}
// Init initialises the labeling system
func Init() {
selinux.SelinuxEnabled()
}
// ReserveLabel will record the fact that the MCS label has already been used.
// This will prevent InitLabels from using the MCS label in a newly created
// container
func ReserveLabel(label string) error {
selinux.ReserveLabel(label)
return nil
}
// UnreserveLabel will remove the reservation of the MCS label.
// This will allow InitLabels to use the MCS label in a newly created
// containers
func UnreserveLabel(label string) error {
selinux.FreeLxcContexts(label)
return nil
}
// DupSecOpt takes an process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return selinux.DupSecOpt(src)
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return selinux.DisableSecOpt()
}
// Validate checks that the label does not include unexpected options
func Validate(label string) error {
if strings.Contains(label, "z") && strings.Contains(label, "Z") {
return ErrIncompatibleLabel
}
return nil
}
// RelabelNeeded checks whether the user requested a relabel
func RelabelNeeded(label string) bool {
return strings.Contains(label, "z") || strings.Contains(label, "Z")
}
// IsShared checks that the label includes a "shared" mark
func IsShared(label string) bool {
return strings.Contains(label, "z")
}

View file

@ -3,9 +3,8 @@
package libcontainer
import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// list of known message types we want to send to bootstrap program
@ -13,13 +12,14 @@ import (
const (
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
ConsolePathAttr uint16 = 27282
NsPathsAttr uint16 = 27283
UidmapAttr uint16 = 27284
GidmapAttr uint16 = 27285
SetgroupAttr uint16 = 27286
// When syscall.NLA_HDRLEN is in gccgo, take this out.
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
NsPathsAttr uint16 = 27282
UidmapAttr uint16 = 27283
GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286
RootlessAttr uint16 = 27287
UidmapPathAttr uint16 = 27288
GidmapPathAttr uint16 = 27289
)
type Int32msg struct {
@ -41,7 +41,7 @@ func (msg *Int32msg) Serialize() []byte {
}
func (msg *Int32msg) Len() int {
return syscall_NLA_HDRLEN + 4
return unix.NLA_HDRLEN + 4
}
// Bytemsg has the following representation
@ -54,7 +54,7 @@ type Bytemsg struct {
func (msg *Bytemsg) Serialize() []byte {
l := msg.Len()
buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1))
buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1))
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(l))
native.PutUint16(buf[2:4], msg.Type)
@ -63,7 +63,7 @@ func (msg *Bytemsg) Serialize() []byte {
}
func (msg *Bytemsg) Len() int {
return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
}
type Boolmsg struct {
@ -85,5 +85,5 @@ func (msg *Boolmsg) Serialize() []byte {
}
func (msg *Boolmsg) Len() int {
return syscall_NLA_HDRLEN + 1
return unix.NLA_HDRLEN + 1
}

View file

@ -0,0 +1,23 @@
package mount
// GetMounts retrieves a list of mounts for the current running process.
func GetMounts() ([]*Info, error) {
return parseMountTable()
}
// Mounted looks at /proc/self/mountinfo to determine of the specified
// mountpoint has been mounted
func Mounted(mountpoint string) (bool, error) {
entries, err := parseMountTable()
if err != nil {
return false, err
}
// Search the table for the mountpoint
for _, e := range entries {
if e.Mountpoint == mountpoint {
return true, nil
}
}
return false, nil
}

View file

@ -0,0 +1,82 @@
// +build linux
package mount
import (
"bufio"
"fmt"
"io"
"os"
"strings"
)
const (
/* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
(1) mount ID: unique identifier of the mount (may be reused after umount)
(2) parent ID: ID of parent (or of self for the top of the mount tree)
(3) major:minor: value of st_dev for files on filesystem
(4) root: root of the mount within the filesystem
(5) mount point: mount point relative to the process's root
(6) mount options: per mount options
(7) optional fields: zero or more fields of the form "tag[:value]"
(8) separator: marks the end of the optional fields
(9) filesystem type: name of filesystem of the form "type[.subtype]"
(10) mount source: filesystem specific information or "none"
(11) super options: per super block options*/
mountinfoFormat = "%d %d %d:%d %s %s %s %s"
)
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
// bind mounts
func parseMountTable() ([]*Info, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
return parseInfoFile(f)
}
func parseInfoFile(r io.Reader) ([]*Info, error) {
var (
s = bufio.NewScanner(r)
out = []*Info{}
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
var (
p = &Info{}
text = s.Text()
optionalFields string
)
if _, err := fmt.Sscanf(text, mountinfoFormat,
&p.ID, &p.Parent, &p.Major, &p.Minor,
&p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil {
return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
}
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
if len(postSeparatorFields) < 3 {
return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
if optionalFields != "-" {
p.Optional = optionalFields
}
p.Fstype = postSeparatorFields[0]
p.Source = postSeparatorFields[1]
p.VfsOpts = strings.Join(postSeparatorFields[2:], " ")
out = append(out, p)
}
return out, nil
}

View file

@ -0,0 +1,40 @@
package mount
// Info reveals information about a particular mounted filesystem. This
// struct is populated from the content in the /proc/<pid>/mountinfo file.
type Info struct {
// ID is a unique identifier of the mount (may be reused after umount).
ID int
// Parent indicates the ID of the mount parent (or of self for the top of the
// mount tree).
Parent int
// Major indicates one half of the device ID which identifies the device class.
Major int
// Minor indicates one half of the device ID which identifies a specific
// instance of device.
Minor int
// Root of the mount within the filesystem.
Root string
// Mountpoint indicates the mount point relative to the process's root.
Mountpoint string
// Opts represents mount-specific options.
Opts string
// Optional represents optional fields.
Optional string
// Fstype indicates the type of filesystem, such as EXT3.
Fstype string
// Source indicates filesystem specific information or "none".
Source string
// VfsOpts represents per super block options.
VfsOpts string
}

View file

@ -7,7 +7,8 @@ import (
"io/ioutil"
"os"
"path/filepath"
"syscall"
"golang.org/x/sys/unix"
)
const oomCgroupName = "memory"
@ -25,13 +26,13 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
if err != nil {
evFile.Close()
return nil, syserr
return nil, err
}
eventfd := os.NewFile(fd, "eventfd")
eventfd := os.NewFile(uintptr(fd), "eventfd")
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
@ -43,9 +44,9 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
evFile.Close()
close(ch)
}()
buf := make([]byte, 8)
for {

View file

@ -0,0 +1,126 @@
// +build linux
package libcontainer
import (
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
"time"
"golang.org/x/sys/unix"
)
type notifyFunc func(paths map[string]string) (<-chan struct{}, error)
func testMemoryNotification(t *testing.T, evName string, notify notifyFunc, targ string) {
memoryPath, err := ioutil.TempDir("", "testmemnotification-"+evName)
if err != nil {
t.Fatal(err)
}
evFile := filepath.Join(memoryPath, evName)
eventPath := filepath.Join(memoryPath, "cgroup.event_control")
if err := ioutil.WriteFile(evFile, []byte{}, 0700); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
paths := map[string]string{
"memory": memoryPath,
}
ch, err := notify(paths)
if err != nil {
t.Fatal("expected no error, got:", err)
}
data, err := ioutil.ReadFile(eventPath)
if err != nil {
t.Fatal("couldn't read event control file:", err)
}
var eventFd, evFd int
var arg string
if targ != "" {
_, err = fmt.Sscanf(string(data), "%d %d %s", &eventFd, &evFd, &arg)
} else {
_, err = fmt.Sscanf(string(data), "%d %d", &eventFd, &evFd)
}
if err != nil || arg != targ {
t.Fatalf("invalid control data %q: %s", data, err)
}
// dup the eventfd
efd, err := unix.Dup(eventFd)
if err != nil {
t.Fatal("unable to dup eventfd:", err)
}
defer unix.Close(efd)
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, 1)
if _, err := unix.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
select {
case <-ch:
case <-time.After(100 * time.Millisecond):
t.Fatal("no notification on channel after 100ms")
}
// simulate what happens when a cgroup is destroyed by cleaning up and then
// writing to the eventfd.
if err := os.RemoveAll(memoryPath); err != nil {
t.Fatal(err)
}
if _, err := unix.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
// give things a moment to shut down
select {
case _, ok := <-ch:
if ok {
t.Fatal("expected no notification to be triggered")
}
case <-time.After(100 * time.Millisecond):
t.Fatal("channel not closed after 100ms")
}
if _, _, err := unix.Syscall(unix.SYS_FCNTL, uintptr(evFd), unix.F_GETFD, 0); err != unix.EBADF {
t.Errorf("expected event control to be closed, but received error %s", err.Error())
}
if _, _, err := unix.Syscall(unix.SYS_FCNTL, uintptr(eventFd), unix.F_GETFD, 0); err != unix.EBADF {
t.Errorf("expected event fd to be closed, but received error %s", err.Error())
}
}
func TestNotifyOnOOM(t *testing.T) {
f := func(paths map[string]string) (<-chan struct{}, error) {
return notifyOnOOM(paths)
}
testMemoryNotification(t, "memory.oom_control", f, "")
}
func TestNotifyMemoryPressure(t *testing.T) {
tests := map[PressureLevel]string{
LowPressure: "low",
MediumPressure: "medium",
CriticalPressure: "critical",
}
for level, arg := range tests {
f := func(paths map[string]string) (<-chan struct{}, error) {
return notifyMemoryPressure(paths, level)
}
testMemoryNotification(t, "memory.pressure_level", f, arg)
}
}

Some files were not shown because too many files have changed in this diff Show more