update vendor

Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
This commit is contained in:
Jess Frazelle 2018-03-19 21:36:34 -04:00
parent 7a437ada25
commit 639756e8c6
4300 changed files with 824810 additions and 9292 deletions

5
vendor/github.com/opencontainers/runc/.gitignore generated vendored Normal file
View file

@ -0,0 +1,5 @@
vendor/pkg
/runc
contrib/cmd/recvtty/recvtty
man/man8
release

10
vendor/github.com/opencontainers/runc/.pullapprove.yml generated vendored Normal file
View file

@ -0,0 +1,10 @@
approve_by_comment: true
approve_regex: ^LGTM
reject_regex: ^Rejected
reset_on_push: true
author_approval: ignored
reviewers:
teams:
- runc-maintainers
name: default
required: 2

35
vendor/github.com/opencontainers/runc/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,35 @@
language: go
go:
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- tip
matrix:
allow_failures:
- go: tip
go_import_path: github.com/opencontainers/runc
# `make ci` uses Docker.
sudo: required
services:
- docker
env:
global:
- BUILDTAGS="seccomp apparmor selinux ambient"
before_install:
- echo "deb http://archive.ubuntu.com/ubuntu trusty-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list
- sudo apt-get -qq update
- sudo apt-get install -y libseccomp-dev/trusty-backports
- go get -u github.com/golang/lint/golint
- go get -u github.com/vbatts/git-validation
- env | grep TRAVIS_
script:
- git-validation -run DCO,short-subject -v
- make BUILDTAGS="${BUILDTAGS}"
- make BUILDTAGS="${BUILDTAGS}" clean ci

124
vendor/github.com/opencontainers/runc/CONTRIBUTING.md generated vendored Normal file
View file

@ -0,0 +1,124 @@
## Contribution Guidelines
### Security issues
If you are reporting a security issue, do not create an issue or file a pull
request on GitHub. Instead, disclose the issue responsibly by sending an email
to security@opencontainers.org (which is inhabited only by the maintainers of
the various OCI projects).
### Pull requests are always welcome
We are always thrilled to receive pull requests, and do our best to
process them as fast as possible. Not sure if that typo is worth a pull
request? Do it! We will appreciate it.
If your pull request is not accepted on the first try, don't be
discouraged! If there's a problem with the implementation, hopefully you
received feedback on what to improve.
We're trying very hard to keep runc lean and focused. We don't want it
to do everything for everybody. This means that we might decide against
incorporating a new feature. However, there might be a way to implement
that feature *on top of* runc.
### Conventions
Fork the repo and make changes on your fork in a feature branch:
- If it's a bugfix branch, name it XXX-something where XXX is the number of the
issue
- If it's a feature branch, create an enhancement issue to announce your
intentions, and name it XXX-something where XXX is the number of the issue.
Submit unit tests for your changes. Go has a great test framework built in; use
it! Take a look at existing tests for inspiration. Run the full test suite on
your branch before submitting a pull request.
Update the documentation when creating or modifying features. Test
your documentation changes for clarity, concision, and correctness, as
well as a clean documentation build. See ``docs/README.md`` for more
information on building the docs and how docs get released.
Write clean code. Universally formatted code promotes ease of writing, reading,
and maintenance. Always run `gofmt -s -w file.go` on each changed file before
committing your changes. Most editors have plugins that do this automatically.
Pull requests descriptions should be as clear as possible and include a
reference to all the issues that they address.
Pull requests must not contain commits from other users or branches.
Commit messages must start with a capitalized and short summary (max. 50
chars) written in the imperative, followed by an optional, more detailed
explanatory text which is separated from the summary by an empty line.
Code review comments may be added to your pull request. Discuss, then make the
suggested modifications and push additional commits to your feature branch. Be
sure to post a comment after pushing. The new commits will show up in the pull
request automatically, but the reviewers will not be notified unless you
comment.
Before the pull request is merged, make sure that you squash your commits into
logical units of work using `git rebase -i` and `git push -f`. After every
commit the test suite should be passing. Include documentation changes in the
same commit so that a revert would remove all traces of the feature or fix.
Commits that fix or close an issue should include a reference like `Closes #XXX`
or `Fixes #XXX`, which will automatically close the issue when merged.
### Sign your work
The sign-off is a simple line at the end of the explanation for the
patch, which certifies that you wrote it or otherwise have the right to
pass it on as an open-source patch. The rules are pretty simple: if you
can certify the below (from
[developercertificate.org](http://developercertificate.org/)):
```
Developer Certificate of Origin
Version 1.1
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
660 York Street, Suite 102,
San Francisco, CA 94110 USA
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
```
then you just add a line to every git commit message:
Signed-off-by: Joe Smith <joe@gmail.com>
using your real name (sorry, no pseudonyms or anonymous contributions.)
You can add the sign off when creating the git commit via `git commit -s`.

59
vendor/github.com/opencontainers/runc/Dockerfile generated vendored Normal file
View file

@ -0,0 +1,59 @@
FROM golang:1.10-stretch
RUN apt-get update && apt-get install -y \
build-essential \
curl \
sudo \
gawk \
iptables \
jq \
pkg-config \
libaio-dev \
libcap-dev \
libprotobuf-dev \
libprotobuf-c0-dev \
libnl-3-dev \
libnet-dev \
libseccomp2 \
libseccomp-dev \
protobuf-c-compiler \
protobuf-compiler \
python-minimal \
uidmap \
--no-install-recommends \
&& apt-get clean
# Add a dummy user for the rootless integration tests. While runC does
# not require an entry in /etc/passwd to operate, one of the tests uses
# `git clone` -- and `git clone` does not allow you to clone a
# repository if the current uid does not have an entry in /etc/passwd.
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
# install bats
RUN cd /tmp \
&& git clone https://github.com/sstephenson/bats.git \
&& cd bats \
&& git reset --hard 03608115df2071fff4eaaff1605768c275e5f81f \
&& ./install.sh /usr/local \
&& rm -rf /tmp/bats
# install criu
ENV CRIU_VERSION v3.7
RUN mkdir -p /usr/src/criu \
&& curl -sSL https://github.com/checkpoint-restore/criu/archive/${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
&& cd /usr/src/criu \
&& make install-criu \
&& rm -rf /usr/src/criu
# setup a playground for us to spawn containers in
ENV ROOTFS /busybox
RUN mkdir -p ${ROOTFS}
COPY script/tmpmount /
WORKDIR /go/src/github.com/opencontainers/runc
ENTRYPOINT ["/tmpmount"]
ADD . /go/src/github.com/opencontainers/runc
RUN . tests/integration/multi-arch.bash \
&& curl -o- -sSL `get_busybox` | tar xfJC - ${ROOTFS}

7
vendor/github.com/opencontainers/runc/MAINTAINERS generated vendored Normal file
View file

@ -0,0 +1,7 @@
Michael Crosby <michael@docker.com> (@crosbymichael)
Rohit Jnagal <jnagal@google.com> (@rjnagal)
Victor Marmol <vmarmol@google.com> (@vmarmol)
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
Qiang Huang <h.huangqiang@huawei.com> (@hqhq)
Aleksa Sarai <asarai@suse.de> (@cyphar)

View file

@ -0,0 +1,120 @@
## Introduction
Dear maintainer. Thank you for investing the time and energy to help
make runc as useful as possible. Maintaining a project is difficult,
sometimes unrewarding work. Sure, you will get to contribute cool
features to the project. But most of your time will be spent reviewing,
cleaning up, documenting, answering questions, justifying design
decisions - while everyone has all the fun! But remember - the quality
of the maintainers work is what distinguishes the good projects from the
great. So please be proud of your work, even the unglamorous parts,
and encourage a culture of appreciation and respect for *every* aspect
of improving the project - not just the hot new features.
This document is a manual for maintainers old and new. It explains what
is expected of maintainers, how they should work, and what tools are
available to them.
This is a living document - if you see something out of date or missing,
speak up!
## What are a maintainer's responsibility?
It is every maintainer's responsibility to:
* 1) Expose a clear roadmap for improving their component.
* 2) Deliver prompt feedback and decisions on pull requests.
* 3) Be available to anyone with questions, bug reports, criticism etc.
on their component. This includes IRC and GitHub issues and pull requests.
* 4) Make sure their component respects the philosophy, design and
roadmap of the project.
## How are decisions made?
Short answer: with pull requests to the runc repository.
runc is an open-source project with an open design philosophy. This
means that the repository is the source of truth for EVERY aspect of the
project, including its philosophy, design, roadmap and APIs. *If it's
part of the project, it's in the repo. It's in the repo, it's part of
the project.*
As a result, all decisions can be expressed as changes to the
repository. An implementation change is a change to the source code. An
API change is a change to the API specification. A philosophy change is
a change to the philosophy manifesto. And so on.
All decisions affecting runc, big and small, follow the same 3 steps:
* Step 1: Open a pull request. Anyone can do this.
* Step 2: Discuss the pull request. Anyone can do this.
* Step 3: Accept (`LGTM`) or refuse a pull request. The relevant maintainers do
this (see below "Who decides what?")
*I'm a maintainer, should I make pull requests too?*
Yes. Nobody should ever push to master directly. All changes should be
made through a pull request.
## Who decides what?
All decisions are pull requests, and the relevant maintainers make
decisions by accepting or refusing the pull request. Review and acceptance
by anyone is denoted by adding a comment in the pull request: `LGTM`.
However, only currently listed `MAINTAINERS` are counted towards the required
two LGTMs.
Overall the maintainer system works because of mutual respect across the
maintainers of the project. The maintainers trust one another to make decisions
in the best interests of the project. Sometimes maintainers can disagree and
this is part of a healthy project to represent the point of views of various people.
In the case where maintainers cannot find agreement on a specific change the
role of a Chief Maintainer comes into play.
The Chief Maintainer for the project is responsible for overall architecture
of the project to maintain conceptual integrity. Large decisions and
architecture changes should be reviewed by the chief maintainer.
The current chief maintainer for the project is Michael Crosby (@crosbymichael).
Even though the maintainer system is built on trust, if there is a conflict
with the chief maintainer on a decision, their decision can be challenged
and brought to the technical oversight board if two-thirds of the
maintainers vote for an appeal. It is expected that this would be a
very exceptional event.
### How are maintainers added?
The best maintainers have a vested interest in the project. Maintainers
are first and foremost contributors that have shown they are committed to
the long term success of the project. Contributors wanting to become
maintainers are expected to be deeply involved in contributing code,
pull request review, and triage of issues in the project for more than two months.
Just contributing does not make you a maintainer, it is about building trust
with the current maintainers of the project and being a person that they can
depend on and trust to make decisions in the best interest of the project. The
final vote to add a new maintainer should be approved by over 66% of the current
maintainers with the chief maintainer having veto power. In case of a veto,
conflict resolution rules expressed above apply. The voting period is
five business days on the Pull Request to add the new maintainer.
### What is expected of maintainers?
Part of a healthy project is to have active maintainers to support the community
in contributions and perform tasks to keep the project running. Maintainers are
expected to be able to respond in a timely manner if their help is required on specific
issues where they are pinged. Being a maintainer is a time consuming commitment and should
not be taken lightly.
When a maintainer is unable to perform the required duties they can be removed with
a vote by 66% of the current maintainers with the chief maintainer having veto power.
The voting period is ten business days. Issues related to a maintainer's performance should
be discussed with them among the other maintainers so that they are not surprised by
a pull request removing them.

122
vendor/github.com/opencontainers/runc/Makefile generated vendored Normal file
View file

@ -0,0 +1,122 @@
.PHONY: all shell dbuild man release \
localtest localunittest localintegration \
test unittest integration
GO := go
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
PREFIX := $(DESTDIR)/usr/local
BINDIR := $(PREFIX)/sbin
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
PROJECT := github.com/opencontainers/runc
BUILDTAGS := seccomp
COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
COMMIT := $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")
MAN_DIR := $(CURDIR)/man/man8
MAN_PAGES = $(shell ls $(MAN_DIR)/*.8)
MAN_PAGES_BASE = $(notdir $(MAN_PAGES))
MAN_INSTALL_PATH := ${PREFIX}/share/man/man8/
RELEASE_DIR := $(CURDIR)/release
VERSION := ${shell cat ./VERSION}
SHELL := $(shell command -v bash 2>/dev/null)
.DEFAULT: runc
runc: $(SOURCES)
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc .
all: runc recvtty
recvtty: contrib/cmd/recvtty/recvtty
contrib/cmd/recvtty/recvtty: $(SOURCES)
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
static: $(SOURCES)
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo cgo static_build" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc .
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo cgo static_build" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
release:
script/release.sh -r release/$(VERSION) -v $(VERSION)
dbuild: runcimage
docker run --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
lint:
$(GO) vet $(allpackages)
$(GO) fmt $(allpackages)
man:
man/md2man-all.sh
runcimage:
docker build -t $(RUNC_IMAGE) .
test:
make unittest integration rootlessintegration
localtest:
make localunittest localintegration localrootlessintegration
unittest: runcimage
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest
localunittest: all
$(GO) test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages)
integration: runcimage
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration
localintegration: all
bats -t tests/integration${TESTFLAGS}
rootlessintegration: runcimage
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration
localrootlessintegration: all
tests/rootless.sh
shell: runcimage
docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
install:
install -D -m0755 runc $(BINDIR)/runc
install-bash:
install -D -m0644 contrib/completions/bash/runc $(PREFIX)/share/bash-completion/completions/runc
install-man:
install -d -m 755 $(MAN_INSTALL_PATH)
install -m 644 $(MAN_PAGES) $(MAN_INSTALL_PATH)
uninstall:
rm -f $(BINDIR)/runc
uninstall-bash:
rm -f $(PREFIX)/share/bash-completion/completions/runc
uninstall-man:
rm -f $(addprefix $(MAN_INSTALL_PATH),$(MAN_PAGES_BASE))
clean:
rm -f runc
rm -f contrib/cmd/recvtty/recvtty
rm -rf $(RELEASE_DIR)
rm -rf $(MAN_DIR)
validate:
script/validate-gofmt
script/validate-c
$(GO) vet $(allpackages)
ci: validate test release
# memoize allpackages, so that it's executed only once and only if used
_allpackages = $(shell $(GO) list ./... | grep -v vendor)
allpackages = $(if $(__allpackages),,$(eval __allpackages := $$(_allpackages)))$(__allpackages)

17
vendor/github.com/opencontainers/runc/NOTICE generated vendored Normal file
View file

@ -0,0 +1,17 @@
runc
Copyright 2012-2015 Docker, Inc.
This product includes software developed at Docker, Inc. (http://www.docker.com).
The following is courtesy of our legal counsel:
Use and transfer of Docker may be subject to certain restrictions by the
United States and other governments.
It is your responsibility to ensure that your use and/or transfer does not
violate applicable laws.
For more information, please see http://www.bis.doc.gov
See also http://www.apache.org/dev/crypto.html and/or seek legal counsel.

19
vendor/github.com/opencontainers/runc/PRINCIPLES.md generated vendored Normal file
View file

@ -0,0 +1,19 @@
# runc principles
In the design and development of runc and libcontainer we try to follow these principles:
(Work in progress)
* Don't try to replace every tool. Instead, be an ingredient to improve them.
* Less code is better.
* Fewer components are better. Do you really need to add one more class?
* 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand.
* Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code.
* When hesitating between two options, choose the one that is easier to reverse.
* "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later.
* Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable.
* The fewer moving parts in a container, the better.
* Don't merge it unless you document it.
* Don't document it unless you can keep it up-to-date.
* Don't merge it unless you test it!
* Everyone's problem is slightly different. Focus on the part that is the same for everyone, and solve that.

253
vendor/github.com/opencontainers/runc/README.md generated vendored Normal file
View file

@ -0,0 +1,253 @@
# runc
[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
## Introduction
`runc` is a CLI tool for spawning and running containers according to the OCI specification.
## Releases
`runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository.
We will try to make sure that `runc` and the OCI specification major versions stay in lockstep.
This means that `runc` 1.0.0 should implement the 1.0 version of the specification.
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
### Security
If you wish to report a security issue, please disclose the issue responsibly
to security@opencontainers.org.
## Building
`runc` currently supports the Linux platform with various architecture support.
It must be built with Go version 1.6 or higher in order for some features to function properly.
In order to enable seccomp support you will need to install `libseccomp` on your platform.
> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make.
```bash
# create a 'github.com/opencontainers' in your GOPATH/src
cd github.com/opencontainers
git clone https://github.com/opencontainers/runc
cd runc
make
sudo make install
```
You can also use `go get` to install to your `GOPATH`, assuming that you have a `github.com` parent folder already created under `src`:
```bash
go get github.com/opencontainers/runc
cd $GOPATH/src/github.com/opencontainers/runc
make
sudo make install
```
`runc` will be installed to `/usr/local/sbin/runc` on your system.
#### Build Tags
`runc` supports optional build tags for compiling support of various features.
To add build tags to the make option the `BUILDTAGS` variable must be set.
```bash
make BUILDTAGS='seccomp apparmor'
```
| Build Tag | Feature | Dependency |
|-----------|------------------------------------|-------------|
| seccomp | Syscall filtering | libseccomp |
| selinux | selinux process and mount labeling | <none> |
| apparmor | apparmor profile support | <none> |
| ambient | ambient capability support | kernel 4.3 |
### Running the test suite
`runc` currently supports running its test suite via Docker.
To run the suite just type `make test`.
```bash
make test
```
There are additional make targets for running the tests outside of a container but this is not recommended as the tests are written with the expectation that they can write and remove anywhere.
You can run a specific test case by setting the `TESTFLAGS` variable.
```bash
# make test TESTFLAGS="-run=SomeTestFunction"
```
### Dependencies Management
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
new dependencies.
## Using runc
### Creating an OCI Bundle
In order to use runc you must have your container in the format of an OCI bundle.
If you have Docker installed you can use its `export` method to acquire a root filesystem from an existing Docker container.
```bash
# create the top most bundle directory
mkdir /mycontainer
cd /mycontainer
# create the rootfs directory
mkdir rootfs
# export busybox via Docker into the rootfs directory
docker export $(docker create busybox) | tar -C rootfs -xvf -
```
After a root filesystem is populated you just generate a spec in the format of a `config.json` file inside your bundle.
`runc` provides a `spec` command to generate a base template spec that you are then able to edit.
To find features and documentation for fields in the spec please refer to the [specs](https://github.com/opencontainers/runtime-spec) repository.
```bash
runc spec
```
### Running Containers
Assuming you have an OCI bundle from the previous step you can execute the container in two different ways.
The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits.
```bash
# run as root
cd /mycontainer
runc run mycontainerid
```
If you used the unmodified `runc spec` template this should give you a `sh` session inside the container.
The second way to start a container is using the specs lifecycle operations.
This gives you more power over how the container is created and managed while it is running.
This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here.
Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
```json
"process": {
"terminal": false,
"user": {
"uid": 0,
"gid": 0
},
"args": [
"sleep", "5"
],
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm"
],
"cwd": "/",
"capabilities": {
"bounding": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"effective": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"inheritable": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"permitted": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"ambient": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
]
},
"rlimits": [
{
"type": "RLIMIT_NOFILE",
"hard": 1024,
"soft": 1024
}
],
"noNewPrivileges": true
},
```
Now we can go through the lifecycle operations in your shell.
```bash
# run as root
cd /mycontainer
runc create mycontainerid
# view the container is created and in the "created" state
runc list
# start the process inside the container
runc start mycontainerid
# after 5 seconds view that the container has exited and is now in the stopped state
runc list
# now delete the container
runc delete mycontainerid
```
This allows higher level systems to augment the containers creation logic with setup of various settings after the container is created and/or before it is deleted. For example, the container's network stack is commonly set up after `create` but before `start`.
#### Rootless containers
`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user:
```bash
# Same as the first example
mkdir ~/mycontainer
cd ~/mycontainer
mkdir rootfs
docker export $(docker create busybox) | tar -C rootfs -xvf -
# The --rootless parameter instructs runc spec to generate a configuration for a rootless container, which will allow you to run the container as a non-root user.
runc spec --rootless
# The --root parameter tells runc where to store the container state. It must be writable by the user.
runc --root /tmp/runc run mycontainerid
```
#### Supervisors
`runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit.
An example systemd unit file looks something like this.
```systemd
[Unit]
Description=Start My Container
[Service]
Type=forking
ExecStart=/usr/local/sbin/runc run -d --pid-file /run/mycontainerid.pid mycontainerid
ExecStopPost=/usr/local/sbin/runc delete mycontainerid
WorkingDirectory=/mycontainer
PIDFile=/run/mycontainerid.pid
[Install]
WantedBy=multi-user.target
```

1
vendor/github.com/opencontainers/runc/VERSION generated vendored Normal file
View file

@ -0,0 +1 @@
1.0.0-rc5+dev

136
vendor/github.com/opencontainers/runc/checkpoint.go generated vendored Normal file
View file

@ -0,0 +1,136 @@
// +build linux
package main
import (
"fmt"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
"golang.org/x/sys/unix"
)
var checkpointCommand = cli.Command{
Name: "checkpoint",
Usage: "checkpoint a running container",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container to be
checkpointed.`,
Description: `The checkpoint command saves the state of the container instance.`,
Flags: []cli.Flag{
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"},
cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc checkpoint requires root")
}
container, err := getContainer(context)
if err != nil {
return err
}
status, err := container.Status()
if err != nil {
return err
}
if status == libcontainer.Created || status == libcontainer.Stopped {
fatalf("Container cannot be checkpointed in %s state", status.String())
}
defer destroy(container)
options := criuOptions(context)
// these are the mandatory criu options for a container
setPageServer(context, options)
setManageCgroupsMode(context, options)
if err := setEmptyNsMask(context, options); err != nil {
return err
}
if err := container.Checkpoint(options); err != nil {
return err
}
return nil
},
}
func getCheckpointImagePath(context *cli.Context) string {
imagePath := context.String("image-path")
if imagePath == "" {
imagePath = getDefaultImagePath(context)
}
return imagePath
}
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
// xxx following criu opts are optional
// The dump image can be sent to a criu page server
if psOpt := context.String("page-server"); psOpt != "" {
addressPort := strings.Split(psOpt, ":")
if len(addressPort) != 2 {
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
}
portInt, err := strconv.Atoi(addressPort[1])
if err != nil {
fatal(fmt.Errorf("Invalid port number"))
}
options.PageServer = libcontainer.CriuPageServerInfo{
Address: addressPort[0],
Port: int32(portInt),
}
}
}
func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) {
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
switch cgOpt {
case "soft":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
case "full":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
case "strict":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
default:
fatal(fmt.Errorf("Invalid manage cgroups mode"))
}
}
}
var namespaceMapping = map[specs.LinuxNamespaceType]int{
specs.NetworkNamespace: unix.CLONE_NEWNET,
}
func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error {
var nsmask int
for _, ns := range context.StringSlice("empty-ns") {
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
if !exists {
return fmt.Errorf("namespace %q is not supported", ns)
}
nsmask |= f
}
options.EmptyNs = uint32(nsmask)
return nil
}

View file

@ -0,0 +1,238 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"io"
"io/ioutil"
"net"
"os"
"strings"
"github.com/containerd/console"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/urfave/cli"
)
// version will be populated by the Makefile, read from
// VERSION file of the source code.
var version = ""
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
const (
usage = `Open Container Initiative contrib/cmd/recvtty
recvtty is a reference implementation of a consumer of runC's --console-socket
API. It has two main modes of operation:
* single: Only permit one terminal to be sent to the socket, which is
then hooked up to the stdio of the recvtty process. This is useful
for rudimentary shell management of a container.
* null: Permit as many terminals to be sent to the socket, but they
are read to /dev/null. This is used for testing, and imitates the
old runC API's --console=/dev/pts/ptmx hack which would allow for a
similar trick. This is probably not what you want to use, unless
you're doing something like our bats integration tests.
To use recvtty, just specify a socket path at which you want to receive
terminals:
$ recvtty [--mode <single|null>] socket.sock
`
)
func bail(err error) {
fmt.Fprintf(os.Stderr, "[recvtty] fatal error: %v\n", err)
os.Exit(1)
}
func handleSingle(path string) error {
// Open a socket.
ln, err := net.Listen("unix", path)
if err != nil {
return err
}
defer ln.Close()
// We only accept a single connection, since we can only really have
// one reader for os.Stdin. Plus this is all a PoC.
conn, err := ln.Accept()
if err != nil {
return err
}
defer conn.Close()
// Close ln, to allow for other instances to take over.
ln.Close()
// Get the fd of the connection.
unixconn, ok := conn.(*net.UnixConn)
if !ok {
return fmt.Errorf("failed to cast to unixconn")
}
socket, err := unixconn.File()
if err != nil {
return err
}
defer socket.Close()
// Get the master file descriptor from runC.
master, err := utils.RecvFd(socket)
if err != nil {
return err
}
c, err := console.ConsoleFromFile(master)
if err != nil {
return err
}
console.ClearONLCR(c.Fd())
// Copy from our stdio to the master fd.
quitChan := make(chan struct{})
go func() {
io.Copy(os.Stdout, c)
quitChan <- struct{}{}
}()
go func() {
io.Copy(c, os.Stdin)
quitChan <- struct{}{}
}()
// Only close the master fd once we've stopped copying.
<-quitChan
c.Close()
return nil
}
func handleNull(path string) error {
// Open a socket.
ln, err := net.Listen("unix", path)
if err != nil {
return err
}
defer ln.Close()
// As opposed to handleSingle we accept as many connections as we get, but
// we don't interact with Stdin at all (and we copy stdout to /dev/null).
for {
conn, err := ln.Accept()
if err != nil {
return err
}
go func(conn net.Conn) {
// Don't leave references lying around.
defer conn.Close()
// Get the fd of the connection.
unixconn, ok := conn.(*net.UnixConn)
if !ok {
return
}
socket, err := unixconn.File()
if err != nil {
return
}
defer socket.Close()
// Get the master file descriptor from runC.
master, err := utils.RecvFd(socket)
if err != nil {
return
}
// Just do a dumb copy to /dev/null.
devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
// TODO: Handle this nicely.
return
}
io.Copy(devnull, master)
devnull.Close()
}(conn)
}
}
func main() {
app := cli.NewApp()
app.Name = "recvtty"
app.Usage = usage
// Set version to be the same as runC.
var v []string
if version != "" {
v = append(v, version)
}
if gitCommit != "" {
v = append(v, fmt.Sprintf("commit: %s", gitCommit))
}
app.Version = strings.Join(v, "\n")
// Set the flags.
app.Flags = []cli.Flag{
cli.StringFlag{
Name: "mode, m",
Value: "single",
Usage: "Mode of operation (single or null)",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "Path to write daemon process ID to",
},
}
app.Action = func(ctx *cli.Context) error {
args := ctx.Args()
if len(args) != 1 {
return fmt.Errorf("need to specify a single socket path")
}
path := ctx.Args()[0]
pidPath := ctx.String("pid-file")
if pidPath != "" {
pid := fmt.Sprintf("%d\n", os.Getpid())
if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil {
return err
}
}
switch ctx.String("mode") {
case "single":
if err := handleSingle(path); err != nil {
return err
}
case "null":
if err := handleNull(path); err != nil {
return err
}
default:
return fmt.Errorf("need to select a valid mode: %s", ctx.String("mode"))
}
return nil
}
if err := app.Run(os.Args); err != nil {
bail(err)
}
}

View file

@ -0,0 +1,805 @@
#!/bin/bash
#
# bash completion file for runc command
#
# This script provides completion of:
# - commands and their options
# - filepaths
#
# To enable the completions either:
# - place this file in /usr/share/bash-completion/completions
# or
# - copy this file to e.g. ~/.runc-completion.sh and add the line
# below to your .bashrc after bash completion features are loaded
# . ~/.runc-completion.sh
#
# Configuration:
#
# Note for developers:
# Please arrange options sorted alphabetically by long name with the short
# options immediately following their corresponding long form.
# This order should be applied to lists, alternatives and code blocks.
__runc_previous_extglob_setting=$(shopt -p extglob)
shopt -s extglob
__runc_list_all() {
COMPREPLY=($(compgen -W "$(runc list -q)" -- $cur))
}
__runc_pos_first_nonflag() {
local argument_flags=$1
local counter=$((${subcommand_pos:-${command_pos}} + 1))
while [ $counter -le $cword ]; do
if [ -n "$argument_flags" ] && eval "case '${words[$counter]}' in $argument_flags) true ;; *) false ;; esac"; then
((counter++))
else
case "${words[$counter]}" in
-*) ;;
*)
break
;;
esac
fi
((counter++))
done
echo $counter
}
# Transforms a multiline list of strings into a single line string
# with the words separated by "|".
# This is used to prepare arguments to __runc_pos_first_nonflag().
__runc_to_alternatives() {
local parts=($1)
local IFS='|'
echo "${parts[*]}"
}
# Transforms a multiline list of options into an extglob pattern
# suitable for use in case statements.
__runc_to_extglob() {
local extglob=$(__runc_to_alternatives "$1")
echo "@($extglob)"
}
# Subcommand processing.
# Locates the first occurrence of any of the subcommands contained in the
# first argument. In case of a match, calls the corresponding completion
# function and returns 0.
# If no match is found, 1 is returned. The calling function can then
# continue processing its completion.
#
# TODO if the preceding command has options that accept arguments and an
# argument is equal ot one of the subcommands, this is falsely detected as
# a match.
__runc_subcommands() {
local subcommands="$1"
local counter=$(($command_pos + 1))
while [ $counter -lt $cword ]; do
case "${words[$counter]}" in
$(__runc_to_extglob "$subcommands"))
subcommand_pos=$counter
local subcommand=${words[$counter]}
local completions_func=_runc_${command}_${subcommand}
declare -F $completions_func >/dev/null && $completions_func
return 0
;;
esac
((counter++))
done
return 1
}
# List all Signals
__runc_list_signals() {
COMPREPLY=($(compgen -W "$(for i in $(kill -l | xargs); do echo $i; done | grep SIG)"))
}
# suppress trailing whitespace
__runc_nospace() {
# compopt is not available in ancient bash versions
type compopt &>/dev/null && compopt -o nospace
}
# The list of capabilities is defined in types.go, ALL was added manually.
__runc_complete_capabilities() {
COMPREPLY=($(compgen -W "
ALL
AUDIT_CONTROL
AUDIT_WRITE
AUDIT_READ
BLOCK_SUSPEND
CHOWN
DAC_OVERRIDE
DAC_READ_SEARCH
FOWNER
FSETID
IPC_LOCK
IPC_OWNER
KILL
LEASE
LINUX_IMMUTABLE
MAC_ADMIN
MAC_OVERRIDE
MKNOD
NET_ADMIN
NET_BIND_SERVICE
NET_BROADCAST
NET_RAW
SETFCAP
SETGID
SETPCAP
SETUID
SYS_ADMIN
SYS_BOOT
SYS_CHROOT
SYSLOG
SYS_MODULE
SYS_NICE
SYS_PACCT
SYS_PTRACE
SYS_RAWIO
SYS_RESOURCE
SYS_TIME
SYS_TTY_CONFIG
WAKE_ALARM
" -- "$cur"))
}
_runc_exec() {
local boolean_options="
--help
--no-new-privs
--tty, -t
--detach, -d
"
local options_with_args="
--console
--cwd
--env, -e
--user, -u
--process, -p
--pid-file
--process-label
--apparmor
--cap, -c
"
local all_options="$options_with_args $boolean_options"
case "$prev" in
--cap | -c)
__runc_complete_capabilities
return
;;
--console | --cwd | --process | --apparmor)
case "$cur" in
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
'')
COMPREPLY=($(compgen -W '/' -- "$cur"))
__runc_nospace
;;
/*)
_filedir
__runc_nospace
;;
esac
return
;;
--env | -e)
COMPREPLY=($(compgen -e -- "$cur"))
__runc_nospace
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$all_options" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
# global options that may appear after the runc command
_runc_runc() {
local boolean_options="
$global_boolean_options
--help
--version -v
--debug
"
local options_with_args="
--log
--log-format
--root
--criu
"
case "$prev" in
--log | --root | --criu)
case "$cur" in
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
'')
COMPREPLY=($(compgen -W '/' -- "$cur"))
__runc_nospace
;;
*)
_filedir
__runc_nospace
;;
esac
return
;;
--log-format)
COMPREPLY=($(compgen -W 'text json' -- "$cur"))
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args"))
if [ $cword -eq $counter ]; then
COMPREPLY=($(compgen -W "${commands[*]} help" -- "$cur"))
fi
;;
esac
}
_runc_pause() {
local boolean_options="
--help
-h
"
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_ps() {
local boolean_options="
--help
-h
"
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_delete() {
local boolean_options="
--help
-h
"
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_kill() {
local boolean_options="
--help
-h
--all
-a
"
case "$prev" in
"kill")
__runc_list_all
return
;;
*)
__runc_list_signals
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_events() {
local boolean_options="
--help
--stats
"
local options_with_args="
--interval
"
case "$prev" in
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_list() {
local boolean_options="
--help
--quiet
-q
"
local options_with_args="
--format
-f
"
case "$prev" in
--format | -f)
COMPREPLY=($(compgen -W 'text json' -- "$cur"))
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args"))
;;
esac
}
_runc_spec() {
local boolean_options="
--help
"
local options_with_args="
--bundle
-b
"
case "$prev" in
--bundle | -b)
case "$cur" in
'')
COMPREPLY=($(compgen -W '/' -- "$cur"))
__runc_nospace
;;
/*)
_filedir
__runc_nospace
;;
esac
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args"))
;;
esac
}
_runc_run() {
local boolean_options="
--help
--detatch
-d
--no-subreaper
--no-pivot
--no-new-keyring
"
local options_with_args="
--bundle
-b
--console
--pid-file
"
case "$prev" in
--bundle | -b | --console | --pid-file)
case "$cur" in
'')
COMPREPLY=($(compgen -W '/' -- "$cur"))
__runc_nospace
;;
/*)
_filedir
__runc_nospace
;;
esac
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_checkpoint() {
local boolean_options="
--help
-h
--leave-running
--tcp-established
--ext-unix-sk
--shell-job
--file-locks
"
local options_with_args="
--image-path
--work-path
--page-server
--manage-cgroups-mode
"
case "$prev" in
--page-server) ;;
--manage-cgroups-mode)
COMPREPLY=($(compgen -W "soft full strict" -- "$cur"))
return
;;
--image-path | --work-path)
case "$cur" in
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
'')
COMPREPLY=($(compgen -W '/' -- "$cur"))
__runc_nospace
;;
*)
_filedir
__runc_nospace
;;
esac
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_create() {
local boolean_options="
--help
--no-pivot
--no-new-keyring
"
local options_with_args="
--bundle
-b
--console
--pid-file
"
case "$prev" in
--bundle | -b | --console | --pid-file)
case "$cur" in
'')
COMPREPLY=($(compgen -W '/' -- "$cur"))
__runc_nospace
;;
/*)
_filedir
__runc_nospace
;;
esac
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_help() {
local counter=$(__runc_pos_first_nonflag)
if [ $cword -eq $counter ]; then
COMPREPLY=($(compgen -W "${commands[*]}" -- "$cur"))
fi
}
_runc_restore() {
local boolean_options="
--help
--tcp-established
--ext-unix-sk
--shell-job
--file-locks
--detach
-d
--no-subreaper
--no-pivot
"
local options_with_args="
-b
--bundle
--image-path
--work-path
--manage-cgroups-mode
--pid-file
"
local all_options="$options_with_args $boolean_options"
case "$prev" in
--manage-cgroups-mode)
COMPREPLY=($(compgen -W "soft full strict" -- "$cur"))
return
;;
--pid-file | --image-path | --work-path | --bundle | -b)
case "$cur" in
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
'')
COMPREPLY=($(compgen -W '/' -- "$cur"))
__runc_nospace
;;
/*)
_filedir
__runc_nospace
;;
esac
return
;;
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$all_options" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_resume() {
local boolean_options="
--help
-h
"
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_state() {
local boolean_options="
--help
-h
"
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_start() {
local boolean_options="
--help
-h
"
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc_update() {
local boolean_options="
--help
"
local options_with_args="
--blkio-weight
--cpu-period
--cpu-quota
--cpu-rt-period
--cpu-rt-runtime
--cpu-share
--cpuset-cpus
--cpuset-mems
--kernel-memory
--kernel-memory-tcp
--memory
--memory-reservation
--memory-swap
"
case "$prev" in
$(__runc_to_extglob "$options_with_args"))
return
;;
esac
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__runc_list_all
;;
esac
}
_runc() {
local previous_extglob_setting=$(shopt -p extglob)
shopt -s extglob
local commands=(
checkpoint
create
delete
events
exec
init
kill
list
pause
ps
restore
resume
run
spec
start
state
update
help
h
)
# These options are valid as global options for all client commands
# and valid as command options for `runc daemon`
local global_boolean_options="
--help -h
--version -v
"
COMPREPLY=()
local cur prev words cword
_get_comp_words_by_ref -n : cur prev words cword
local command='runc' command_pos=0 subcommand_pos
local counter=1
while [ $counter -lt $cword ]; do
case "${words[$counter]}" in
-*) ;;
=)
((counter++))
;;
*)
command="${words[$counter]}"
command_pos=$counter
break
;;
esac
((counter++))
done
local completions_func=_runc_${command}
declare -F $completions_func >/dev/null && $completions_func
eval "$previous_extglob_setting"
return 0
}
eval "$__runc_previous_extglob_setting"
unset __runc_previous_extglob_setting
complete -F _runc runc

74
vendor/github.com/opencontainers/runc/create.go generated vendored Normal file
View file

@ -0,0 +1,74 @@
package main
import (
"os"
"github.com/urfave/cli"
)
var createCommand = cli.Command{
Name: "create",
Usage: "create a container",
ArgsUsage: `<container-id>
Where "<container-id>" is your name for the instance of the container that you
are starting. The name you provide for the container instance must be unique on
your host.`,
Description: `The create command creates an instance of a container for a bundle. The bundle
is a directory with a specification file named "` + specConfig + `" and a root
filesystem.
The specification file includes an args parameter. The args parameter is used
to specify command(s) that get run when the container is started. To change the
command(s) that get executed on start, edit the args parameter of the spec. See
"runc spec --help" for more explanation.`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "bundle, b",
Value: "",
Usage: `path to the root of the bundle directory, defaults to the current directory`,
},
cli.StringFlag{
Name: "console-socket",
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "specify the file to write the process id to",
},
cli.BoolFlag{
Name: "no-pivot",
Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk",
},
cli.BoolFlag{
Name: "no-new-keyring",
Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key",
},
cli.IntFlag{
Name: "preserve-fds",
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
if err := revisePidFile(context); err != nil {
return err
}
spec, err := setupSpec(context)
if err != nil {
return err
}
status, err := startContainer(context, spec, CT_ACT_CREATE, nil)
if err != nil {
return err
}
// exit with the container's exit status so any external supervisor is
// notified of the exit with the correct exit status.
os.Exit(status)
return nil
},
}

90
vendor/github.com/opencontainers/runc/delete.go generated vendored Normal file
View file

@ -0,0 +1,90 @@
// +build !solaris
package main
import (
"fmt"
"os"
"path/filepath"
"syscall"
"time"
"github.com/opencontainers/runc/libcontainer"
"github.com/urfave/cli"
"golang.org/x/sys/unix"
)
func killContainer(container libcontainer.Container) error {
_ = container.Signal(unix.SIGKILL, false)
for i := 0; i < 100; i++ {
time.Sleep(100 * time.Millisecond)
if err := container.Signal(syscall.Signal(0), false); err != nil {
destroy(container)
return nil
}
}
return fmt.Errorf("container init still running")
}
var deleteCommand = cli.Command{
Name: "delete",
Usage: "delete any resources held by the container often used with detached container",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container.
EXAMPLE:
For example, if the container id is "ubuntu01" and runc list currently shows the
status of "ubuntu01" as "stopped" the following will delete resources held for
"ubuntu01" removing "ubuntu01" from the runc list of containers:
# runc delete ubuntu01`,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "force, f",
Usage: "Forcibly deletes the container if it is still running (uses SIGKILL)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
id := context.Args().First()
force := context.Bool("force")
container, err := getContainer(context)
if err != nil {
if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists {
// if there was an aborted start or something of the sort then the container's directory could exist but
// libcontainer does not see it because the state.json file inside that directory was never created.
path := filepath.Join(context.GlobalString("root"), id)
if e := os.RemoveAll(path); e != nil {
fmt.Fprintf(os.Stderr, "remove %s: %v\n", path, e)
}
if force {
return nil
}
}
return err
}
s, err := container.Status()
if err != nil {
return err
}
switch s {
case libcontainer.Stopped:
destroy(container)
case libcontainer.Created:
return killContainer(container)
default:
if force {
return killContainer(container)
} else {
return fmt.Errorf("cannot delete container %s that is not stopped: %s\n", id, s)
}
}
return nil
},
}

295
vendor/github.com/opencontainers/runc/events.go generated vendored Normal file
View file

@ -0,0 +1,295 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"os"
"sync"
"time"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)
// event struct for encoding the event data to json.
type event struct {
Type string `json:"type"`
ID string `json:"id"`
Data interface{} `json:"data,omitempty"`
}
// stats is the runc specific stats structure for stability when encoding and decoding stats.
type stats struct {
CPU cpu `json:"cpu"`
Memory memory `json:"memory"`
Pids pids `json:"pids"`
Blkio blkio `json:"blkio"`
Hugetlb map[string]hugetlb `json:"hugetlb"`
IntelRdt intelRdt `json:"intel_rdt"`
}
type hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Failcnt uint64 `json:"failcnt"`
}
type blkioEntry struct {
Major uint64 `json:"major,omitempty"`
Minor uint64 `json:"minor,omitempty"`
Op string `json:"op,omitempty"`
Value uint64 `json:"value,omitempty"`
}
type blkio struct {
IoServiceBytesRecursive []blkioEntry `json:"ioServiceBytesRecursive,omitempty"`
IoServicedRecursive []blkioEntry `json:"ioServicedRecursive,omitempty"`
IoQueuedRecursive []blkioEntry `json:"ioQueueRecursive,omitempty"`
IoServiceTimeRecursive []blkioEntry `json:"ioServiceTimeRecursive,omitempty"`
IoWaitTimeRecursive []blkioEntry `json:"ioWaitTimeRecursive,omitempty"`
IoMergedRecursive []blkioEntry `json:"ioMergedRecursive,omitempty"`
IoTimeRecursive []blkioEntry `json:"ioTimeRecursive,omitempty"`
SectorsRecursive []blkioEntry `json:"sectorsRecursive,omitempty"`
}
type pids struct {
Current uint64 `json:"current,omitempty"`
Limit uint64 `json:"limit,omitempty"`
}
type throttling struct {
Periods uint64 `json:"periods,omitempty"`
ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"`
ThrottledTime uint64 `json:"throttledTime,omitempty"`
}
type cpuUsage struct {
// Units: nanoseconds.
Total uint64 `json:"total,omitempty"`
Percpu []uint64 `json:"percpu,omitempty"`
Kernel uint64 `json:"kernel"`
User uint64 `json:"user"`
}
type cpu struct {
Usage cpuUsage `json:"usage,omitempty"`
Throttling throttling `json:"throttling,omitempty"`
}
type memoryEntry struct {
Limit uint64 `json:"limit"`
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Failcnt uint64 `json:"failcnt"`
}
type memory struct {
Cache uint64 `json:"cache,omitempty"`
Usage memoryEntry `json:"usage,omitempty"`
Swap memoryEntry `json:"swap,omitempty"`
Kernel memoryEntry `json:"kernel,omitempty"`
KernelTCP memoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
}
type l3CacheInfo struct {
CbmMask string `json:"cbm_mask,omitempty"`
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
NumClosids uint64 `json:"num_closids,omitempty"`
}
type intelRdt struct {
// The read-only L3 cache information
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
// The read-only L3 cache schema in root
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
// The L3 cache schema in 'container_id' group
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}
var eventsCommand = cli.Command{
Name: "events",
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
ArgsUsage: `<container-id>
Where "<container-id>" is the name for the instance of the container.`,
Description: `The events command displays information about the container. By default the
information is displayed once every 5 seconds.`,
Flags: []cli.Flag{
cli.DurationFlag{Name: "interval", Value: 5 * time.Second, Usage: "set the stats collection interval"},
cli.BoolFlag{Name: "stats", Usage: "display the container's stats then exit"},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
duration := context.Duration("interval")
if duration <= 0 {
return fmt.Errorf("duration interval must be greater than 0")
}
status, err := container.Status()
if err != nil {
return err
}
if status == libcontainer.Stopped {
return fmt.Errorf("container with id %s is not running", container.ID())
}
var (
stats = make(chan *libcontainer.Stats, 1)
events = make(chan *event, 1024)
group = &sync.WaitGroup{}
)
group.Add(1)
go func() {
defer group.Done()
enc := json.NewEncoder(os.Stdout)
for e := range events {
if err := enc.Encode(e); err != nil {
logrus.Error(err)
}
}
}()
if context.Bool("stats") {
s, err := container.Stats()
if err != nil {
return err
}
events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
close(events)
group.Wait()
return nil
}
go func() {
for range time.Tick(context.Duration("interval")) {
s, err := container.Stats()
if err != nil {
logrus.Error(err)
continue
}
stats <- s
}
}()
n, err := container.NotifyOOM()
if err != nil {
return err
}
for {
select {
case _, ok := <-n:
if ok {
// this means an oom event was received, if it is !ok then
// the channel was closed because the container stopped and
// the cgroups no longer exist.
events <- &event{Type: "oom", ID: container.ID()}
} else {
n = nil
}
case s := <-stats:
events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
}
if n == nil {
close(events)
break
}
}
group.Wait()
return nil
},
}
func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
cg := ls.CgroupStats
if cg == nil {
return nil
}
var s stats
s.Pids.Current = cg.PidsStats.Current
s.Pids.Limit = cg.PidsStats.Limit
s.CPU.Usage.Kernel = cg.CpuStats.CpuUsage.UsageInKernelmode
s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode
s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage
s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
s.Memory.Cache = cg.MemoryStats.Cache
s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage)
s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage)
s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage)
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
s.Blkio.IoQueuedRecursive = convertBlkioEntry(cg.BlkioStats.IoQueuedRecursive)
s.Blkio.IoServiceTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceTimeRecursive)
s.Blkio.IoWaitTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoWaitTimeRecursive)
s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive)
s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive)
s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive)
s.Hugetlb = make(map[string]hugetlb)
for k, v := range cg.HugetlbStats {
s.Hugetlb[k] = convertHugtlb(v)
}
if is := ls.IntelRdtStats; is != nil {
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
}
return &s
}
func convertHugtlb(c cgroups.HugetlbStats) hugetlb {
return hugetlb{
Usage: c.Usage,
Max: c.MaxUsage,
Failcnt: c.Failcnt,
}
}
func convertMemoryEntry(c cgroups.MemoryData) memoryEntry {
return memoryEntry{
Limit: c.Limit,
Usage: c.Usage,
Max: c.MaxUsage,
Failcnt: c.Failcnt,
}
}
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
var out []blkioEntry
for _, e := range c {
out = append(out, blkioEntry{
Major: e.Major,
Minor: e.Minor,
Op: e.Op,
Value: e.Value,
})
}
return out
}
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
return &l3CacheInfo{
CbmMask: i.CbmMask,
MinCbmBits: i.MinCbmBits,
NumClosids: i.NumClosids,
}
}

222
vendor/github.com/opencontainers/runc/exec.go generated vendored Normal file
View file

@ -0,0 +1,222 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
var execCommand = cli.Command{
Name: "exec",
Usage: "execute new process inside the container",
ArgsUsage: `<container-id> <command> [command options] || -p process.json <container-id>
Where "<container-id>" is the name for the instance of the container and
"<command>" is the command to be executed in the container.
"<command>" can't be empty unless a "-p" flag provided.
EXAMPLE:
For example, if the container is configured to run the linux ps command the
following will output a list of processes running in the container:
# runc exec <container-id> ps`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "console-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "cwd",
Usage: "current working directory in the container",
},
cli.StringSliceFlag{
Name: "env, e",
Usage: "set environment variables",
},
cli.BoolFlag{
Name: "tty, t",
Usage: "allocate a pseudo-TTY",
},
cli.StringFlag{
Name: "user, u",
Usage: "UID (format: <uid>[:<gid>])",
},
cli.Int64SliceFlag{
Name: "additional-gids, g",
Usage: "additional gids",
},
cli.StringFlag{
Name: "process, p",
Usage: "path to the process.json",
},
cli.BoolFlag{
Name: "detach,d",
Usage: "detach from the container's process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "specify the file to write the process id to",
},
cli.StringFlag{
Name: "process-label",
Usage: "set the asm process label for the process commonly used with selinux",
},
cli.StringFlag{
Name: "apparmor",
Usage: "set the apparmor profile for the process",
},
cli.BoolFlag{
Name: "no-new-privs",
Usage: "set the no new privileges value for the process",
},
cli.StringSliceFlag{
Name: "cap, c",
Value: &cli.StringSlice{},
Usage: "add a capability to the bounding set for the process",
},
cli.BoolFlag{
Name: "no-subreaper",
Usage: "disable the use of the subreaper used to reap reparented processes",
Hidden: true,
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
if err := revisePidFile(context); err != nil {
return err
}
status, err := execProcess(context)
if err == nil {
os.Exit(status)
}
return fmt.Errorf("exec failed: %v", err)
},
SkipArgReorder: true,
}
func execProcess(context *cli.Context) (int, error) {
container, err := getContainer(context)
if err != nil {
return -1, err
}
status, err := container.Status()
if err != nil {
return -1, err
}
if status == libcontainer.Stopped {
return -1, fmt.Errorf("cannot exec a container that has stopped")
}
path := context.String("process")
if path == "" && len(context.Args()) == 1 {
return -1, fmt.Errorf("process args cannot be empty")
}
detach := context.Bool("detach")
state, err := container.State()
if err != nil {
return -1, err
}
bundle := utils.SearchLabels(state.Config.Labels, "bundle")
p, err := getProcess(context, bundle)
if err != nil {
return -1, err
}
r := &runner{
enableSubreaper: false,
shouldDestroy: false,
container: container,
consoleSocket: context.String("console-socket"),
detach: detach,
pidFile: context.String("pid-file"),
action: CT_ACT_RUN,
}
return r.run(p)
}
func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
if path := context.String("process"); path != "" {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
var p specs.Process
if err := json.NewDecoder(f).Decode(&p); err != nil {
return nil, err
}
return &p, validateProcessSpec(&p)
}
// process via cli flags
if err := os.Chdir(bundle); err != nil {
return nil, err
}
spec, err := loadSpec(specConfig)
if err != nil {
return nil, err
}
p := spec.Process
p.Args = context.Args()[1:]
// override the cwd, if passed
if context.String("cwd") != "" {
p.Cwd = context.String("cwd")
}
if ap := context.String("apparmor"); ap != "" {
p.ApparmorProfile = ap
}
if l := context.String("process-label"); l != "" {
p.SelinuxLabel = l
}
if caps := context.StringSlice("cap"); len(caps) > 0 {
for _, c := range caps {
p.Capabilities.Bounding = append(p.Capabilities.Bounding, c)
p.Capabilities.Inheritable = append(p.Capabilities.Inheritable, c)
p.Capabilities.Effective = append(p.Capabilities.Effective, c)
p.Capabilities.Permitted = append(p.Capabilities.Permitted, c)
p.Capabilities.Ambient = append(p.Capabilities.Ambient, c)
}
}
// append the passed env variables
p.Env = append(p.Env, context.StringSlice("env")...)
// set the tty
if context.IsSet("tty") {
p.Terminal = context.Bool("tty")
}
if context.IsSet("no-new-privs") {
p.NoNewPrivileges = context.Bool("no-new-privs")
}
// override the user, if passed
if context.String("user") != "" {
u := strings.SplitN(context.String("user"), ":", 2)
if len(u) > 1 {
gid, err := strconv.Atoi(u[1])
if err != nil {
return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err)
}
p.User.GID = uint32(gid)
}
uid, err := strconv.Atoi(u[0])
if err != nil {
return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err)
}
p.User.UID = uint32(uid)
}
for _, gid := range context.Int64Slice("additional-gids") {
if gid < 0 {
return nil, fmt.Errorf("additional-gids must be a positive number %d", gid)
}
p.User.AdditionalGids = append(p.User.AdditionalGids, uint32(gid))
}
return p, nil
}

31
vendor/github.com/opencontainers/runc/init.go generated vendored Normal file
View file

@ -0,0 +1,31 @@
package main
import (
"os"
"runtime"
"github.com/opencontainers/runc/libcontainer"
_ "github.com/opencontainers/runc/libcontainer/nsenter"
"github.com/urfave/cli"
)
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
}
}
var initCommand = cli.Command{
Name: "init",
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
Action: func(context *cli.Context) error {
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
// as the error is sent back to the parent there is no need to log
// or write it to stderr because the parent process will handle this
os.Exit(1)
}
panic("libcontainer: container init failed to exec")
},
}

71
vendor/github.com/opencontainers/runc/kill.go generated vendored Normal file
View file

@ -0,0 +1,71 @@
// +build linux
package main
import (
"fmt"
"strconv"
"strings"
"syscall"
"github.com/urfave/cli"
)
var killCommand = cli.Command{
Name: "kill",
Usage: "kill sends the specified signal (default: SIGTERM) to the container's init process",
ArgsUsage: `<container-id> [signal]
Where "<container-id>" is the name for the instance of the container and
"[signal]" is the signal to be sent to the init process.
EXAMPLE:
For example, if the container id is "ubuntu01" the following will send a "KILL"
signal to the init process of the "ubuntu01" container:
# runc kill ubuntu01 KILL`,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "all, a",
Usage: "send the specified signal to all processes inside the container",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
if err := checkArgs(context, 2, maxArgs); err != nil {
return err
}
container, err := getContainer(context)
if err != nil {
return err
}
sigstr := context.Args().Get(1)
if sigstr == "" {
sigstr = "SIGTERM"
}
signal, err := parseSignal(sigstr)
if err != nil {
return err
}
if err := container.Signal(signal, context.Bool("all")); err != nil {
return err
}
return nil
},
}
func parseSignal(rawSignal string) (syscall.Signal, error) {
s, err := strconv.Atoi(rawSignal)
if err == nil {
return syscall.Signal(s), nil
}
signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")]
if !ok {
return -1, fmt.Errorf("unknown signal %q", rawSignal)
}
return signal, nil
}

View file

@ -0,0 +1,328 @@
# libcontainer
[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer)
Libcontainer provides a native Go implementation for creating containers
with namespaces, cgroups, capabilities, and filesystem access controls.
It allows you to manage the lifecycle of the container performing additional operations
after the container is created.
#### Container
A container is a self contained execution environment that shares the kernel of the
host system and which is (optionally) isolated from other containers in the system.
#### Using libcontainer
Because containers are spawned in a two step process you will need a binary that
will be executed as the init process for the container. In libcontainer, we use
the current binary (/proc/self/exe) to be executed as the init process, and use
arg "init", we call the first step process "bootstrap", so you always need a "init"
function as the entry of "bootstrap".
In addition to the go init function the early stage bootstrap is handled by importing
[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md).
```go
import (
_ "github.com/opencontainers/runc/libcontainer/nsenter"
)
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
logrus.Fatal(err)
}
panic("--this line should have never been executed, congratulations--")
}
}
```
Then to create a container you first have to initialize an instance of a factory
that will handle the creation and initialization for a container.
```go
factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
if err != nil {
logrus.Fatal(err)
return
}
```
Once you have an instance of the factory created we can create a configuration
struct describing how the container is to be created. A sample would look similar to this:
```go
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
config := &configs.Config{
Rootfs: "/your/path/to/rootfs",
Capabilities: &configs.Capabilities{
Bounding: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Effective: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Inheritable: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Permitted: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Ambient: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
{Type: configs.NEWPID},
{Type: configs.NEWUSER},
{Type: configs.NEWNET},
}),
Cgroups: &configs.Cgroup{
Name: "test-container",
Parent: "system",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: nil,
AllowedDevices: configs.DefaultAllowedDevices,
},
},
MaskPaths: []string{
"/proc/kcore",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "testing",
Mounts: []*configs.Mount{
{
Source: "proc",
Destination: "/proc",
Device: "proc",
Flags: defaultMountFlags,
},
{
Source: "tmpfs",
Destination: "/dev",
Device: "tmpfs",
Flags: unix.MS_NOSUID | unix.MS_STRICTATIME,
Data: "mode=755",
},
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
Flags: unix.MS_NOSUID | unix.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
{
Device: "tmpfs",
Source: "shm",
Destination: "/dev/shm",
Data: "mode=1777,size=65536k",
Flags: defaultMountFlags,
},
{
Source: "mqueue",
Destination: "/dev/mqueue",
Device: "mqueue",
Flags: defaultMountFlags,
},
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | unix.MS_RDONLY,
},
},
UidMappings: []configs.IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 65536,
},
},
GidMappings: []configs.IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 65536,
},
},
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: unix.RLIMIT_NOFILE,
Hard: uint64(1025),
Soft: uint64(1025),
},
},
}
```
Once you have the configuration populated you can create a container:
```go
container, err := factory.Create("container-id", config)
if err != nil {
logrus.Fatal(err)
return
}
```
To spawn bash as the initial process inside the container and have the
processes pid returned in order to wait, signal, or kill the process:
```go
process := &libcontainer.Process{
Args: []string{"/bin/bash"},
Env: []string{"PATH=/bin"},
User: "daemon",
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
err := container.Run(process)
if err != nil {
container.Destroy()
logrus.Fatal(err)
return
}
// wait for the process to finish.
_, err := process.Wait()
if err != nil {
logrus.Fatal(err)
}
// destroy the container.
container.Destroy()
```
Additional ways to interact with a running container are:
```go
// return all the pids for all processes running inside the container.
processes, err := container.Processes()
// get detailed cpu, memory, io, and network statistics for the container and
// it's processes.
stats, err := container.Stats()
// pause all processes inside the container.
container.Pause()
// resume all paused processes.
container.Resume()
// send signal to container's init process.
container.Signal(signal)
// update container resource constraints.
container.Set(config)
// get current status of the container.
status, err := container.Status()
// get current container's state information.
state, err := container.State()
```
#### Checkpoint & Restore
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
This let's you save the state of a process running inside a container to disk, and then restore
that state into a new process, on the same machine or on another machine.
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
If you don't already have `criu` installed, you can build it from source, following the
[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image
generated when building libcontainer with docker.
## Copyright and license
Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license.
Docs released under Creative commons.

View file

@ -0,0 +1,418 @@
## Container Specification - v1
This is the standard configuration for version 1 containers. It includes
namespaces, standard filesystem setup, a default Linux capability set, and
information about resource reservations. It also has information about any
populated environment settings for the processes running inside a container.
Along with the configuration of how a container is created the standard also
discusses actions that can be performed on a container to manage and inspect
information about the processes running inside.
The v1 profile is meant to be able to accommodate the majority of applications
with a strong security configuration.
### System Requirements and Compatibility
Minimum requirements:
* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches)
* Mounted cgroups with each subsystem in its own hierarchy
### Namespaces
| Flag | Enabled |
| ------------ | ------- |
| CLONE_NEWPID | 1 |
| CLONE_NEWUTS | 1 |
| CLONE_NEWIPC | 1 |
| CLONE_NEWNET | 1 |
| CLONE_NEWNS | 1 |
| CLONE_NEWUSER | 1 |
Namespaces are created for the container via the `clone` syscall.
### Filesystem
A root filesystem must be provided to a container for execution. The container
will use this root filesystem (rootfs) to jail and spawn processes inside where
the binaries and system libraries are local to that directory. Any binaries
to be executed must be contained within this rootfs.
Mounts that happen inside the container are automatically cleaned up when the
container exits as the mount namespace is destroyed and the kernel will
unmount all the mounts that were setup within that namespace.
For a container to execute properly there are certain filesystems that
are required to be mounted within the rootfs that the runtime will setup.
| Path | Type | Flags | Data |
| ----------- | ------ | -------------------------------------- | ---------------------------------------- |
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 |
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
After a container's filesystems are mounted within the newly created
mount namespace `/dev` will need to be populated with a set of device nodes.
It is expected that a rootfs does not need to have any device nodes specified
for `/dev` within the rootfs as the container will setup the correct devices
that are required for executing a container's process.
| Path | Mode | Access |
| ------------ | ---- | ---------- |
| /dev/null | 0666 | rwm |
| /dev/zero | 0666 | rwm |
| /dev/full | 0666 | rwm |
| /dev/tty | 0666 | rwm |
| /dev/random | 0666 | rwm |
| /dev/urandom | 0666 | rwm |
**ptmx**
`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within
the container.
The use of a pseudo TTY is optional within a container and it should support both.
If a pseudo is provided to the container `/dev/console` will need to be
setup by binding the console in `/dev/` after it has been populated and mounted
in tmpfs.
| Source | Destination | UID GID | Mode | Type |
| --------------- | ------------ | ------- | ---- | ---- |
| *pty host path* | /dev/console | 0 0 | 0600 | bind |
After `/dev/null` has been setup we check for any external links between
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
to `/dev/null` outside the container we close and `dup2` the `/dev/null`
that is local to the container's rootfs.
After the container has `/proc` mounted a few standard symlinks are setup
within `/dev/` for the io.
| Source | Destination |
| --------------- | ----------- |
| /proc/self/fd | /dev/fd |
| /proc/self/fd/0 | /dev/stdin |
| /proc/self/fd/1 | /dev/stdout |
| /proc/self/fd/2 | /dev/stderr |
A `pivot_root` is used to change the root for the process, effectively
jailing the process inside the rootfs.
```c
put_old = mkdir(...);
pivot_root(rootfs, put_old);
chdir("/");
unmount(put_old, MS_DETACH);
rmdir(put_old);
```
For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined
with a `chroot` is required as `pivot_root` is not supported in `ramfs`.
```c
mount(rootfs, "/", NULL, MS_MOVE, NULL);
chroot(".");
chdir("/");
```
The `umask` is set back to `0022` after the filesystem setup has been completed.
### Resources
Cgroups are used to handle resource allocation for containers. This includes
system resources like cpu, memory, and device access.
| Subsystem | Enabled |
| ---------- | ------- |
| devices | 1 |
| memory | 1 |
| cpu | 1 |
| cpuacct | 1 |
| cpuset | 1 |
| blkio | 1 |
| perf_event | 1 |
| freezer | 1 |
| hugetlb | 1 |
| pids | 1 |
All cgroup subsystem are joined so that statistics can be collected from
each of the subsystems. Freezer does not expose any stats but is joined
so that containers can be paused and resumed.
The parent process of the container's init must place the init pid inside
the correct cgroups before the initialization begins. This is done so
that no processes or threads escape the cgroups. This sync is
done via a pipe ( specified in the runtime section below ) that the container's
init process will block waiting for the parent to finish setup.
### IntelRdt
Intel platforms with new Xeon CPU support Intel Resource Director Technology
(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
currently supports L3 cache resource allocation.
This feature provides a way for the software to restrict cache allocation to a
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
The different subsets are identified by class of service (CLOS) and each CLOS
has a capacity bitmask (CBM).
It can be used to handle L3 cache resource allocation for containers if
hardware and kernel support Intel RDT/CAT.
In Linux 4.10 kernel or newer, the interface is defined and exposed via
"resource control" filesystem, which is a "cgroup-like" interface.
Comparing with cgroups, it has similar process management lifecycle and
interfaces in a container. But unlike cgroups' hierarchy, it has single level
filesystem layout.
Intel RDT "resource control" filesystem hierarchy:
```
mount -t resctrl resctrl /sys/fs/resctrl
tree /sys/fs/resctrl
/sys/fs/resctrl/
|-- info
| |-- L3
| |-- cbm_mask
| |-- min_cbm_bits
| |-- num_closids
|-- cpus
|-- schemata
|-- tasks
|-- <container_id>
|-- cpus
|-- schemata
|-- tasks
```
For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
resource constraints.
The file `tasks` has a list of tasks that belongs to this group (e.g.,
<container_id>" group). Tasks can be added to a group by writing the task ID
to the "tasks" file (which will automatically remove them from the previous
group to which they belonged). New tasks created by fork(2) and clone(2) are
added to the same group as their parent. If a pid is not in any sub group, it
is in root group.
The file `schemata` has allocation masks/values for L3 cache on each socket,
which contains L3 cache id and capacity bitmask (CBM).
```
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
```
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
be set is less than the max bit. The max bits in the CBM is varied among
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
layout, the CBM in a group should be a subset of the CBM in root. Kernel will
check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
For more information about Intel RDT/CAT kernel interface:
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
An example for runc:
```
Consider a two-socket machine with two L3 caches where the default CBM is
0xfffff and the max CBM length is 20 bits. With this configuration, tasks
inside the container only have access to the "upper" 80% of L3 cache id 0 and
the "lower" 50% L3 cache id 1:
"linux": {
"intelRdt": {
"l3CacheSchema": "L3:0=ffff0;1=3ff"
}
}
```
### Security
The standard set of Linux capabilities that are set in a container
provide a good default for security and flexibility for the applications.
| Capability | Enabled |
| -------------------- | ------- |
| CAP_NET_RAW | 1 |
| CAP_NET_BIND_SERVICE | 1 |
| CAP_AUDIT_READ | 1 |
| CAP_AUDIT_WRITE | 1 |
| CAP_DAC_OVERRIDE | 1 |
| CAP_SETFCAP | 1 |
| CAP_SETPCAP | 1 |
| CAP_SETGID | 1 |
| CAP_SETUID | 1 |
| CAP_MKNOD | 1 |
| CAP_CHOWN | 1 |
| CAP_FOWNER | 1 |
| CAP_FSETID | 1 |
| CAP_KILL | 1 |
| CAP_SYS_CHROOT | 1 |
| CAP_NET_BROADCAST | 0 |
| CAP_SYS_MODULE | 0 |
| CAP_SYS_RAWIO | 0 |
| CAP_SYS_PACCT | 0 |
| CAP_SYS_ADMIN | 0 |
| CAP_SYS_NICE | 0 |
| CAP_SYS_RESOURCE | 0 |
| CAP_SYS_TIME | 0 |
| CAP_SYS_TTY_CONFIG | 0 |
| CAP_AUDIT_CONTROL | 0 |
| CAP_MAC_OVERRIDE | 0 |
| CAP_MAC_ADMIN | 0 |
| CAP_NET_ADMIN | 0 |
| CAP_SYSLOG | 0 |
| CAP_DAC_READ_SEARCH | 0 |
| CAP_LINUX_IMMUTABLE | 0 |
| CAP_IPC_LOCK | 0 |
| CAP_IPC_OWNER | 0 |
| CAP_SYS_PTRACE | 0 |
| CAP_SYS_BOOT | 0 |
| CAP_LEASE | 0 |
| CAP_WAKE_ALARM | 0 |
| CAP_BLOCK_SUSPEND | 0 |
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
and [selinux](http://selinuxproject.org/page/Main_Page) can be used with
the containers. A container should support setting an apparmor profile or
selinux process and mount labels if provided in the configuration.
Standard apparmor profile:
```c
#include <tunables/global>
profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
#include <abstractions/base>
network,
capability,
file,
umount,
deny @{PROC}/sys/fs/** wklx,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
deny @{PROC}/kmem rwklx,
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
deny @{PROC}/sys/kernel/*/** wklx,
deny mount,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,
deny /sys/fs/[^c]*/** wklx,
deny /sys/fs/c[^g]*/** wklx,
deny /sys/fs/cg[^r]*/** wklx,
deny /sys/firmware/efi/efivars/** rwklx,
deny /sys/kernel/security/** rwklx,
}
```
*TODO: seccomp work is being done to find a good default config*
### Runtime and Init Process
During container creation the parent process needs to talk to the container's init
process and have a form of synchronization. This is accomplished by creating
a pipe that is passed to the container's init. When the init process first spawns
it will block on its side of the pipe until the parent closes its side. This
allows the parent to have time to set the new process inside a cgroup hierarchy
and/or write any uid/gid mappings required for user namespaces.
The pipe is passed to the init process via FD 3.
The application consuming libcontainer should be compiled statically. libcontainer
does not define any init process and the arguments provided are used to `exec` the
process inside the application. There should be no long running init within the
container spec.
If a pseudo tty is provided to a container it will open and `dup2` the console
as the container's STDIN, STDOUT, STDERR as well as mounting the console
as `/dev/console`.
An extra set of mounts are provided to a container and setup for use. A container's
rootfs can contain some non portable files inside that can cause side effects during
execution of a process. These files are usually created and populated with the container
specific information via the runtime.
**Extra runtime files:**
* /etc/hosts
* /etc/resolv.conf
* /etc/hostname
* /etc/localtime
#### Defaults
There are a few defaults that can be overridden by users, but in their omission
these apply to processes within a container.
| Type | Value |
| ------------------- | ------------------------------ |
| Parent Death Signal | SIGKILL |
| UID | 0 |
| GID | 0 |
| GROUPS | 0, NULL |
| CWD | "/" |
| $HOME | Current user's home dir or "/" |
| Readonly rootfs | false |
| Pseudo TTY | false |
## Actions
After a container is created there is a standard set of actions that can
be done to the container. These actions are part of the public API for
a container.
| Action | Description |
| -------------- | ------------------------------------------------------------------ |
| Get processes | Return all the pids for processes running inside a container |
| Get Stats | Return resource statistics for the container as a whole |
| Wait | Waits on the container's init process ( pid 1 ) |
| Wait Process | Wait on any of the container's processes returning the exit status |
| Destroy | Kill the container's init process and remove any filesystem state |
| Signal | Send a signal to the container's init process |
| Signal Process | Send a signal to any of the container's processes |
| Pause | Pause all processes inside the container |
| Resume | Resume all processes inside the container if paused |
| Exec | Execute a new process inside of the container ( requires setns ) |
| Set | Setup configs of the container after it's created |
### Execute a new process inside of a running container
User can execute a new process inside of a running container. Any binaries to be
executed must be accessible within the container's rootfs.
The started process will run inside the container's rootfs. Any changes
made by the process to the container's filesystem will persist after the
process finished executing.
The started process will join all the container's existing namespaces. When the
container is paused, the process will also be paused and will resume when
the container is unpaused. The started process will only run when the container's
primary process (PID 1) is running, and will not be restarted when the container
is restarted.
#### Planned additions
The started process will have its own cgroups nested inside the container's
cgroups. This is used for process tracking and optionally resource allocation
handling for the new process. Freezer cgroup is required, the rest of the cgroups
are optional. The process executor must place its pid inside the correct
cgroups before starting the process. This is done so that no child processes or
threads can escape the cgroups.
When the process is stopped, the process executor will try (in a best-effort way)
to stop all its children and remove the sub-cgroups.

View file

@ -2,14 +2,10 @@
package apparmor
// #cgo LDFLAGS: -lapparmor
// #include <sys/apparmor.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"io/ioutil"
"os"
"unsafe"
)
// IsEnabled returns true if apparmor is enabled for the host.
@ -23,16 +19,36 @@ func IsEnabled() bool {
return false
}
func setprocattr(attr, value string) error {
// Under AppArmor you can only change your own attr, so use /proc/self/
// instead of /proc/<tid>/ like libapparmor does
path := fmt.Sprintf("/proc/self/attr/%s", attr)
f, err := os.OpenFile(path, os.O_WRONLY, 0)
if err != nil {
return err
}
defer f.Close()
_, err = fmt.Fprintf(f, "%s", value)
return err
}
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
func changeOnExec(name string) error {
value := "exec " + name
if err := setprocattr("exec", value); err != nil {
return fmt.Errorf("apparmor failed to apply profile: %s", err)
}
return nil
}
// ApplyProfile will apply the profile with the specified name to the process after
// the next exec.
func ApplyProfile(name string) error {
if name == "" {
return nil
}
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
if _, err := C.aa_change_onexec(cName); err != nil {
return err
}
return nil
return changeOnExec(name)
}

View file

@ -4,13 +4,13 @@ package libcontainer
import (
"fmt"
"os"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
var capabilityMap map[string]capability.Cap
@ -30,40 +30,84 @@ func init() {
}
}
func newCapWhitelist(caps []string) (*whitelist, error) {
l := []capability.Cap{}
for _, c := range caps {
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
bounding := []capability.Cap{}
for _, c := range capConfig.Bounding {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
l = append(l, v)
bounding = append(bounding, v)
}
pid, err := capability.NewPid(os.Getpid())
effective := []capability.Cap{}
for _, c := range capConfig.Effective {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
effective = append(effective, v)
}
inheritable := []capability.Cap{}
for _, c := range capConfig.Inheritable {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
inheritable = append(inheritable, v)
}
permitted := []capability.Cap{}
for _, c := range capConfig.Permitted {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
permitted = append(permitted, v)
}
ambient := []capability.Cap{}
for _, c := range capConfig.Ambient {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
ambient = append(ambient, v)
}
pid, err := capability.NewPid(0)
if err != nil {
return nil, err
}
return &whitelist{
keep: l,
pid: pid,
return &containerCapabilities{
bounding: bounding,
effective: effective,
inheritable: inheritable,
permitted: permitted,
ambient: ambient,
pid: pid,
}, nil
}
type whitelist struct {
pid capability.Capabilities
keep []capability.Cap
type containerCapabilities struct {
pid capability.Capabilities
bounding []capability.Cap
effective []capability.Cap
inheritable []capability.Cap
permitted []capability.Cap
ambient []capability.Cap
}
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
func (w *whitelist) dropBoundingSet() error {
w.pid.Clear(capability.BOUNDS)
w.pid.Set(capability.BOUNDS, w.keep...)
return w.pid.Apply(capability.BOUNDS)
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
func (c *containerCapabilities) ApplyBoundingSet() error {
c.pid.Clear(capability.BOUNDS)
c.pid.Set(capability.BOUNDS, c.bounding...)
return c.pid.Apply(capability.BOUNDS)
}
// drop drops all capabilities for the current process except those specified in the whitelist.
func (w *whitelist) drop() error {
w.pid.Clear(allCapabilityTypes)
w.pid.Set(allCapabilityTypes, w.keep...)
return w.pid.Apply(allCapabilityTypes)
// Apply sets all the capabilities for the current process in the config.
func (c *containerCapabilities) ApplyCaps() error {
c.pid.Clear(allCapabilityTypes)
c.pid.Set(capability.BOUNDS, c.bounding...)
c.pid.Set(capability.PERMITTED, c.permitted...)
c.pid.Set(capability.INHERITABLE, c.inheritable...)
c.pid.Set(capability.EFFECTIVE, c.effective...)
c.pid.Set(capability.AMBIENT, c.ambient...)
return c.pid.Apply(allCapabilityTypes)
}

View file

@ -27,9 +27,9 @@ type Manager interface {
// Destroys the cgroup set
Destroy() error
// NewCgroupManager() and LoadCgroupManager() require following attributes:
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
// Paths map[string]string
// Cgroups *cgroups.Cgroup
// Cgroups *configs.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems
@ -37,7 +37,7 @@ type Manager interface {
// restore the object later.
GetPaths() map[string]string
// Set the cgroup as configured.
// Sets the cgroup as configured.
Set(container *configs.Config) error
}

View file

@ -0,0 +1,18 @@
// +build linux
package cgroups
import (
"testing"
)
func TestParseCgroups(t *testing.T) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
t.Fatal(err)
}
if _, ok := cgroups["cpu"]; !ok {
t.Fail()
}
}

View file

@ -9,10 +9,8 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
@ -34,7 +32,6 @@ var (
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
)
@ -107,6 +104,8 @@ func (m *Manager) Apply(pid int) (err error) {
if m.Cgroups == nil {
return nil
}
m.mu.Lock()
defer m.mu.Unlock()
var c = m.Cgroups
@ -115,8 +114,8 @@ func (m *Manager) Apply(pid int) (err error) {
return err
}
m.Paths = make(map[string]string)
if c.Paths != nil {
paths := make(map[string]string)
for name, path := range c.Paths {
_, err := d.path(name)
if err != nil {
@ -125,37 +124,44 @@ func (m *Manager) Apply(pid int) (err error) {
}
return err
}
paths[name] = path
m.Paths[name] = path
}
m.Paths = paths
return cgroups.EnterPid(m.Paths, pid)
}
m.mu.Lock()
defer m.mu.Unlock()
paths := make(map[string]string)
for _, sys := range subsystems {
if err := sys.Apply(d); err != nil {
return err
}
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(sys.Name())
if err != nil {
if cgroups.IsNotFound(err) {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
continue
}
return err
}
paths[sys.Name()] = p
m.Paths[sys.Name()] = p
if err := sys.Apply(d); err != nil {
if os.IsPermission(err) && m.Cgroups.Path == "" {
// If we didn't set a cgroup path, then let's defer the error here
// until we know whether we have set limits or not.
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
// it will have the same limits as its parent.
delete(m.Paths, sys.Name())
continue
}
return err
}
}
m.Paths = paths
return nil
}
func (m *Manager) Destroy() error {
if m.Cgroups.Paths != nil {
if m.Cgroups == nil || m.Cgroups.Paths != nil {
return nil
}
m.mu.Lock()
@ -191,19 +197,20 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
for _, sys := range subsystems {
// Generate fake cgroup data.
d, err := getCgroupData(container.Cgroups, -1)
if err != nil {
return err
}
// Get the path, but don't error out if the cgroup wasn't found.
path, err := d.path(sys.Name())
if err != nil && !cgroups.IsNotFound(err) {
return err
}
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
paths := m.GetPaths()
for _, sys := range subsystems {
path := paths[sys.Name()]
if err := sys.Set(path, container.Cgroups); err != nil {
if path == "" {
// cgroup never applied
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
}
return err
}
}
@ -219,14 +226,8 @@ func (m *Manager) Set(container *configs.Config) error {
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func (m *Manager) Freeze(state configs.FreezerState) error {
d, err := getCgroupData(m.Cgroups, 0)
if err != nil {
return err
}
dir, err := d.path("freezer")
if err != nil {
return err
}
paths := m.GetPaths()
dir := paths["freezer"]
prevState := m.Cgroups.Resources.Freezer
m.Cgroups.Resources.Freezer = state
freezer, err := subsystems.Get("freezer")
@ -242,28 +243,13 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := getCgroupPath(m.Cgroups)
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
paths := m.GetPaths()
return cgroups.GetPids(paths["devices"])
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := getCgroupPath(m.Cgroups)
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func getCgroupPath(c *configs.Cgroup) (string, error) {
d, err := getCgroupData(c, 0)
if err != nil {
return "", err
}
return d.path("devices")
paths := m.GetPaths()
return cgroups.GetAllPids(paths["devices"])
}
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
@ -294,25 +280,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil
}
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}
func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
@ -320,11 +289,14 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.innerPath) {
// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
parentPath, err := raw.parentPath(subsystem, mnt, root)
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
@ -340,7 +312,7 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
return "", err
}
return path, nil
@ -353,8 +325,7 @@ func writeFile(dir, file, data string) error {
return fmt.Errorf("no such directory for %s", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
//return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
logrus.Debugf("failed to write %v to %v: %v", data, file, err)
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
}
return nil
}
@ -374,8 +345,8 @@ func removePath(p string, err error) error {
return nil
}
func CheckCpushares(path string, c int64) error {
var cpuShares int64
func CheckCpushares(path string, c uint64) error {
var cpuShares uint64
if c == 0 {
return nil

View file

@ -0,0 +1,272 @@
// +build linux
package fs
import (
"path/filepath"
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestInvalidCgroupPath(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Path: "../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
func TestInvalidAbsoluteCgroupPath(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Path: "/../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidCgroupParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "../../../../../../../../../../some/path",
Name: "name",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidAbsoluteCgroupParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "/../../../../../../../../../../some/path",
Name: "name",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidCgroupName(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "parent",
Name: "../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidAbsoluteCgroupName(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "parent",
Name: "/../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidCgroupNameAndParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "../../../../../../../../../../some/path",
Name: "../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) {
root, err := getCgroupRoot()
if err != nil {
t.Errorf("couldn't get cgroup root: %v", err)
}
config := &configs.Cgroup{
Parent: "/../../../../../../../../../../some/path",
Name: "/../../../../../../../../../../some/path",
}
data, err := getCgroupData(config, 0)
if err != nil {
t.Errorf("couldn't get cgroup data: %v", err)
}
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
if strings.HasPrefix(data.innerPath, "..") {
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
}
// Double-check, using an actual cgroup.
deviceRoot := filepath.Join(root, "devices")
devicePath, err := data.path("devices")
if err != nil {
t.Errorf("couldn't get cgroup path: %v", err)
}
if !strings.HasPrefix(devicePath, deviceRoot) {
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
}
}

View file

@ -0,0 +1,636 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
const (
sectorsRecursiveContents = `8:0 1024`
serviceBytesRecursiveContents = `8:0 Read 100
8:0 Write 200
8:0 Sync 300
8:0 Async 500
8:0 Total 500
Total 500`
servicedRecursiveContents = `8:0 Read 10
8:0 Write 40
8:0 Sync 20
8:0 Async 30
8:0 Total 50
Total 50`
queuedRecursiveContents = `8:0 Read 1
8:0 Write 4
8:0 Sync 2
8:0 Async 3
8:0 Total 5
Total 5`
serviceTimeRecursiveContents = `8:0 Read 173959
8:0 Write 0
8:0 Sync 0
8:0 Async 173959
8:0 Total 17395
Total 17395`
waitTimeRecursiveContents = `8:0 Read 15571
8:0 Write 0
8:0 Sync 0
8:0 Async 15571
8:0 Total 15571`
mergedRecursiveContents = `8:0 Read 5
8:0 Write 10
8:0 Sync 0
8:0 Async 0
8:0 Total 15
Total 15`
timeRecursiveContents = `8:0 8`
throttleServiceBytes = `8:0 Read 11030528
8:0 Write 23
8:0 Sync 42
8:0 Async 11030528
8:0 Total 11030528
252:0 Read 11030528
252:0 Write 23
252:0 Sync 42
252:0 Async 11030528
252:0 Total 11030528
Total 22061056`
throttleServiced = `8:0 Read 164
8:0 Write 23
8:0 Sync 42
8:0 Async 164
8:0 Total 164
252:0 Read 164
252:0 Write 23
252:0 Sync 42
252:0 Async 164
252:0 Total 164
Total 328`
)
func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) {
*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
}
func TestBlkioSetWeight(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightBefore = 100
weightAfter = 200
)
helper.writeFileContents(map[string]string{
"blkio.weight": strconv.Itoa(weightBefore),
})
helper.CgroupData.config.Resources.BlkioWeight = weightAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight")
if err != nil {
t.Fatalf("Failed to parse blkio.weight - %s", err)
}
if value != weightAfter {
t.Fatal("Got the wrong value, set blkio.weight failed.")
}
}
func TestBlkioSetWeightDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightDeviceBefore = "8:0 400"
)
wd := configs.NewWeightDevice(8, 0, 500, 0)
weightDeviceAfter := wd.WeightString()
helper.writeFileContents(map[string]string{
"blkio.weight_device": weightDeviceBefore,
})
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
if err != nil {
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
}
if value != weightDeviceAfter {
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
}
}
// regression #274
func TestBlkioSetMultipleWeightDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightDeviceBefore = "8:0 400"
)
wd1 := configs.NewWeightDevice(8, 0, 500, 0)
wd2 := configs.NewWeightDevice(8, 16, 500, 0)
// we cannot actually set and check both because normal ioutil.WriteFile
// when writing to cgroup file will overwrite the whole file content instead
// of updating it as the kernel is doing. Just check the second device
// is present will suffice for the test to ensure multiple writes are done.
weightDeviceAfter := wd2.WeightString()
helper.writeFileContents(map[string]string{
"blkio.weight_device": weightDeviceBefore,
})
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
if err != nil {
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
}
if value != weightDeviceAfter {
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
}
}
func TestBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
// Verify expected stats.
expectedStats := cgroups.BlkioStats{}
appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total")
appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "")
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
}
func TestBlkioStatsNoSectorsFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServiceBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServicedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoQueuedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServiceTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoWaitTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoMergedFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read 100 100",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsUnexpectedFieldType(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read Write",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestNonCFQBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "",
"blkio.io_serviced_recursive": "",
"blkio.io_queued_recursive": "",
"blkio.sectors_recursive": "",
"blkio.io_service_time_recursive": "",
"blkio.io_wait_time_recursive": "",
"blkio.io_merged_recursive": "",
"blkio.time_recursive": "",
"blkio.throttle.io_service_bytes": throttleServiceBytes,
"blkio.throttle.io_serviced": throttleServiced,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
// Verify expected stats.
expectedStats := cgroups.BlkioStats{}
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total")
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
}
func TestBlkioSetThrottleReadBpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.read_bps_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.")
}
}
func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.write_bps_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.")
}
}
func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.read_iops_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.")
}
}
func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
throttleBefore = `8:0 1024`
)
td := configs.NewThrottleDevice(8, 0, 2048)
throttleAfter := td.String()
helper.writeFileContents(map[string]string{
"blkio.throttle.write_iops_device": throttleBefore,
})
helper.CgroupData.config.Resources.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td}
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.")
}
}

View file

@ -22,21 +22,59 @@ func (s *CpuGroup) Name() string {
func (s *CpuGroup) Apply(d *cgroupData) error {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
_, err := d.join("cpu")
path, err := d.path("cpu")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
return s.ApplyDir(path, d.config, d.pid)
}
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
// This might happen if we have no cpu cgroup mounted.
// Just do nothing and don't fail.
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// We should set the real-Time group scheduling settings before moving
// in the process because if the process is already in SCHED_RR mode
// and no RT bandwidth is set, adding it will fail.
if err := s.SetRtSched(path, cgroup); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
return err
}
return nil
}
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil
}
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuShares != 0 {
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuPeriod != 0 {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
return err
}
}
@ -45,15 +83,8 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
}
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
return err
}
if err := s.SetRtSched(path, cgroup); err != nil {
return err
}
return nil

View file

@ -0,0 +1,209 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
func TestCpuSetShares(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
sharesBefore = 1024
sharesAfter = 512
)
helper.writeFileContents(map[string]string{
"cpu.shares": strconv.Itoa(sharesBefore),
})
helper.CgroupData.config.Resources.CpuShares = sharesAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares")
if err != nil {
t.Fatalf("Failed to parse cpu.shares - %s", err)
}
if value != sharesAfter {
t.Fatal("Got the wrong value, set cpu.shares failed.")
}
}
func TestCpuSetBandWidth(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
quotaBefore = 8000
quotaAfter = 5000
periodBefore = 10000
periodAfter = 7000
rtRuntimeBefore = 8000
rtRuntimeAfter = 5000
rtPeriodBefore = 10000
rtPeriodAfter = 7000
)
helper.writeFileContents(map[string]string{
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
})
helper.CgroupData.config.Resources.CpuQuota = quotaAfter
helper.CgroupData.config.Resources.CpuPeriod = periodAfter
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
}
if quota != quotaAfter {
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
}
period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
}
if period != periodAfter {
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
}
rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
}
if rtRuntime != rtRuntimeAfter {
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
}
rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
}
if rtPeriod != rtPeriodAfter {
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
}
}
func TestCpuStats(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
nrPeriods = 2000
nrThrottled = 200
throttledTime = uint64(18446744073709551615)
)
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
nrPeriods, nrThrottled, throttledTime)
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.ThrottlingData{
Periods: nrPeriods,
ThrottledPeriods: nrThrottled,
ThrottledTime: throttledTime}
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
}
func TestNoCpuStatFile(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal("Expected not to fail, but did")
}
}
func TestInvalidCpuStat(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpuStatContent := `nr_periods 2000
nr_throttled 200
throttled_time fortytwo`
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failed stat parsing.")
}
}
func TestCpuSetRtSchedAtApply(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
rtRuntimeBefore = 0
rtRuntimeAfter = 5000
rtPeriodBefore = 0
rtPeriodAfter = 7000
)
helper.writeFileContents(map[string]string{
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
})
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
cpu := &CpuGroup{}
if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil {
t.Fatal(err)
}
rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
}
if rtRuntime != rtRuntimeAfter {
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
}
rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
}
if rtPeriod != rtPeriodAfter {
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
}
pid, err := getCgroupParamUint(helper.CgroupPath, "cgroup.procs")
if err != nil {
t.Fatalf("Failed to parse cgroup.procs - %s", err)
}
if pid != 1234 {
t.Fatal("Got the wrong value, set cgroup.procs failed.")
}
}

View file

@ -8,7 +8,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
@ -58,16 +57,34 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
if dir == "" {
return nil
}
root, err := getCgroupRoot()
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
if err != nil {
return err
}
if err := s.ensureParent(dir, root); err != nil {
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
// 'ensureParent' start with parent because we don't want to
// explicitly inherit from parent, it could conflict with
// 'cpuset.cpu_exclusive'.
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
return err
}
if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
// We didn't inherit cpuset configs from parent, but we have
// to ensure cpuset configs are set before moving task into the
// cgroup.
// The logic is, if user specified cpuset configs, use these
// specified configs, otherwise, inherit from parent. This makes
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
// keep backward compatbility.
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
return err
}
@ -137,3 +154,10 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
func (s *CpusetGroup) isEmpty(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
if err := s.Set(path, cgroup); err != nil {
return err
}
return s.copyIfNeeded(path, filepath.Dir(path))
}

View file

@ -0,0 +1,65 @@
// +build linux
package fs
import (
"testing"
)
func TestCpusetSetCpus(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
cpusBefore = "0"
cpusAfter = "1-3"
)
helper.writeFileContents(map[string]string{
"cpuset.cpus": cpusBefore,
})
helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus")
if err != nil {
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
}
if value != cpusAfter {
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
}
}
func TestCpusetSetMems(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
memsBefore = "0"
memsAfter = "1"
)
helper.writeFileContents(map[string]string{
"cpuset.mems": memsBefore,
})
helper.CgroupData.config.Resources.CpusetMems = memsAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems")
if err != nil {
t.Fatalf("Failed to parse cpuset.mems - %s", err)
}
if value != memsAfter {
t.Fatal("Got the wrong value, set cpuset.mems failed.")
}
}

View file

@ -43,21 +43,23 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
}
return nil
}
if !cgroup.Resources.AllowAllDevices {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
if cgroup.Resources.AllowAllDevices != nil {
if *cgroup.Resources.AllowAllDevices == false {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
}
}
for _, dev := range cgroup.Resources.DeniedDevices {

View file

@ -0,0 +1,98 @@
// +build linux
package fs
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
allowedDevices = []*configs.Device{
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
}
allowedList = "c 1:5 rwm"
deniedDevices = []*configs.Device{
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
}
deniedList = "c 1:3 rwm"
)
func TestDevicesSetAllow(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.deny": "a",
})
allowAllDevices := false
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("Got the wrong value, set devices.allow failed.")
}
// When AllowAllDevices is nil, devices.allow file should not be modified.
helper.CgroupData.config.Resources.AllowAllDevices = nil
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err = getCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
}
}
func TestDevicesSetDeny(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.allow": "a",
})
allowAllDevices := true
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "devices.deny")
if err != nil {
t.Fatalf("Failed to parse devices.deny - %s", err)
}
if value != deniedList {
t.Fatal("Got the wrong value, set devices.deny failed.")
}
}

View file

@ -29,11 +29,15 @@ func (s *FreezerGroup) Apply(d *cgroupData) error {
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
switch cgroup.Resources.Freezer {
case configs.Frozen, configs.Thawed:
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
return err
}
for {
// In case this loop does not exit because it doesn't get the expected
// state, let's write again this state, hoping it's going to be properly
// set this time. Otherwise, this loop could run infinitely, waiting for
// a state change that would never happen.
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
return err
}
state, err := readFile(path, "freezer.state")
if err != nil {
return err
@ -41,6 +45,7 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
break
}
time.Sleep(1 * time.Millisecond)
}
case configs.Undefined:

View file

@ -0,0 +1,47 @@
// +build linux
package fs
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestFreezerSetState(t *testing.T) {
helper := NewCgroupTestUtil("freezer", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"freezer.state": string(configs.Frozen),
})
helper.CgroupData.config.Resources.Freezer = configs.Thawed
freezer := &FreezerGroup{}
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "freezer.state")
if err != nil {
t.Fatalf("Failed to parse freezer.state - %s", err)
}
if value != string(configs.Thawed) {
t.Fatal("Got the wrong value, set freezer.state failed.")
}
}
func TestFreezerSetInvalidState(t *testing.T) {
helper := NewCgroupTestUtil("freezer", t)
defer helper.cleanup()
const (
invalidArg configs.FreezerState = "Invalid"
)
helper.CgroupData.config.Resources.Freezer = invalidArg
freezer := &FreezerGroup{}
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
t.Fatal("Failed to return invalid argument error")
}
}

View file

@ -0,0 +1,154 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
const (
hugetlbUsageContents = "128\n"
hugetlbMaxUsageContents = "256\n"
hugetlbFailcnt = "100\n"
)
var (
usage = "hugetlb.%s.usage_in_bytes"
limit = "hugetlb.%s.limit_in_bytes"
maxUsage = "hugetlb.%s.max_usage_in_bytes"
failcnt = "hugetlb.%s.failcnt"
)
func TestHugetlbSetHugetlb(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
const (
hugetlbBefore = 256
hugetlbAfter = 512
)
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
})
}
for _, pageSize := range HugePageSizes {
helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
{
Pagesize: pageSize,
Limit: hugetlbAfter,
},
}
hugetlb := &HugetlbGroup{}
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
}
for _, pageSize := range HugePageSizes {
limit := fmt.Sprintf(limit, pageSize)
value, err := getCgroupParamUint(helper.CgroupPath, limit)
if err != nil {
t.Fatalf("Failed to parse %s - %s", limit, err)
}
if value != hugetlbAfter {
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
}
}
}
func TestHugetlbStats(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt,
})
}
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
for _, pageSize := range HugePageSizes {
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
}
}
func TestHugetlbStatsNoUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
maxUsage: hugetlbMaxUsageContents,
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
})
}
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsBadUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
for _, pageSize := range HugePageSizes {
helper.writeFileContents(map[string]string{
fmt.Sprintf(usage, pageSize): "bad",
maxUsage: hugetlbMaxUsageContents,
})
}
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
usage: hugetlbUsageContents,
maxUsage: "bad",
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}

View file

@ -5,13 +5,23 @@ package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall" // only for Errno
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
)
type MemoryGroup struct {
@ -25,20 +35,23 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
path, err := d.path("memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
} else if path == "" {
return nil
}
if memoryAssigned(d.config) {
if path != "" {
if _, err := os.Stat(path); os.IsNotExist(err) {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached.
if err := s.SetKernelMemory(path, d.config); err != nil {
return err
// Only enable kernel memory accouting when this cgroup
// is created by libcontainer, otherwise we might get
// error when people use `cgroupsPath` to join an existed
// cgroup whose kernel memory is not initialized.
if err := EnableKernelMemoryAccounting(path); err != nil {
return err
}
}
}
defer func() {
if err != nil {
os.RemoveAll(path)
@ -54,21 +67,57 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
return nil
}
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
// This has to be done separately because it has special constraints (it
// can't be done after there are processes attached to the cgroup).
if cgroup.Resources.KernelMemory > 0 {
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := setKernelMemory(path, i); err != nil {
return err
}
}
return nil
}
func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// If the memory update is set to -1 we should also
// set swap to -1, it means unlimited memory.
if cgroup.Resources.Memory == -1 {
// Only set swap if it's enabled in kernel
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
cgroup.Resources.MemorySwap = -1
}
}
// When memory and swap memory are both set, we need to handle the cases
// for updating container.
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 {
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return err
@ -77,29 +126,29 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// When update memory limit, we should adapt the write sequence
// for memory and swap memory, so it won't fail because the new
// value and the old value don't fit kernel's validation.
if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
} else {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
} else {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap != 0 {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
@ -113,11 +162,18 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
if cgroup.Resources.KernelMemory != 0 {
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
return err
}
}
if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return err
}
}
if cgroup.Resources.KernelMemoryTCP != 0 {
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
return err
@ -130,12 +186,12 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
}
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
return nil
} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
} else if *cgroup.Resources.MemorySwappiness <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
return err
}
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
}
return nil
@ -187,6 +243,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
}
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
value, err := getCgroupParamUint(path, useHierarchy)
if err != nil {
return err
}
if value == 1 {
stats.MemoryStats.UseHierarchy = true
}
return nil
}
@ -197,7 +261,7 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
cgroup.Resources.KernelMemory > 0 ||
cgroup.Resources.KernelMemoryTCP > 0 ||
cgroup.Resources.OomKillDisable ||
(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {

View file

@ -0,0 +1,455 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
const (
memoryStatContents = `cache 512
rss 1024`
memoryUsageContents = "2048\n"
memoryMaxUsageContents = "4096\n"
memoryFailcnt = "100\n"
memoryLimitContents = "8192\n"
memoryUseHierarchyContents = "1\n"
)
func TestMemorySetMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 314572800 // 300M
memoryAfter = 524288000 // 500M
reservationBefore = 209715200 // 200M
reservationAfter = 314572800 // 300M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
})
helper.CgroupData.config.Resources.Memory = memoryAfter
helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
}
if value != reservationAfter {
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
}
}
func TestMemorySetMemoryswap(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryswapBefore = 314572800 // 300M
memoryswapAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
})
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
}
}
func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 314572800 // 300M
memoryswapBefore = 524288000 // 500M
memoryAfter = 629145600 // 600M
memoryswapAfter = 838860800 // 800M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
// Set will call getMemoryData when memory and swap memory are
// both set, fake these fields so we don't get error.
"memory.usage_in_bytes": "0",
"memory.max_usage_in_bytes": "0",
"memory.failcnt": "0",
})
helper.CgroupData.config.Resources.Memory = memoryAfter
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
}
}
func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 629145600 // 600M
memoryswapBefore = 838860800 // 800M
memoryAfter = 314572800 // 300M
memoryswapAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
// Set will call getMemoryData when memory and swap memory are
// both set, fake these fields so we don't get error.
"memory.usage_in_bytes": "0",
"memory.max_usage_in_bytes": "0",
"memory.failcnt": "0",
})
helper.CgroupData.config.Resources.Memory = memoryAfter
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
}
}
func TestMemorySetKernelMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
kernelMemoryBefore = 314572800 // 300M
kernelMemoryAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
})
helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
}
if value != kernelMemoryAfter {
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
}
}
func TestMemorySetKernelMemoryTCP(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
kernelMemoryTCPBefore = 314572800 // 300M
kernelMemoryTCPAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore),
})
helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err)
}
if value != kernelMemoryTCPAfter {
t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.")
}
}
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
swappinessBefore := 60 //default is 60
swappinessAfter := uint64(0)
helper.writeFileContents(map[string]string{
"memory.swappiness": strconv.Itoa(swappinessBefore),
})
helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.swappiness")
if err != nil {
t.Fatalf("Failed to parse memory.swappiness - %s", err)
}
if value != swappinessAfter {
t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter)
}
}
func TestMemoryStats(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.failcnt": memoryFailcnt,
"memory.memsw.usage_in_bytes": memoryUsageContents,
"memory.memsw.max_usage_in_bytes": memoryMaxUsageContents,
"memory.memsw.failcnt": memoryFailcnt,
"memory.memsw.limit_in_bytes": memoryLimitContents,
"memory.kmem.usage_in_bytes": memoryUsageContents,
"memory.kmem.max_usage_in_bytes": memoryMaxUsageContents,
"memory.kmem.failcnt": memoryFailcnt,
"memory.kmem.limit_in_bytes": memoryLimitContents,
"memory.use_hierarchy": memoryUseHierarchyContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}, UseHierarchy: true}
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
}
func TestMemoryStatsNoStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
}
func TestMemoryStatsNoUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": "rss rss",
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": "bad",
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": "bad",
"memory.limit_in_bytes": memoryLimitContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.limit_in_bytes": "bad",
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemorySetOomControl(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
oomKillDisable = 1 // disable oom killer, default is 0
)
helper.writeFileContents(map[string]string{
"memory.oom_control": strconv.Itoa(oomKillDisable),
})
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control")
if err != nil {
t.Fatalf("Failed to parse memory.oom_control - %s", err)
}
if value != oomKillDisable {
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
}
}

View file

@ -3,6 +3,8 @@
package fs
import (
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
@ -23,8 +25,8 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
}
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.NetClsClassid != "" {
if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil {
if cgroup.Resources.NetClsClassid != 0 {
if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
return err
}
}

View file

@ -0,0 +1,39 @@
// +build linux
package fs
import (
"strconv"
"testing"
)
const (
classidBefore = 0x100002
classidAfter = 0x100001
)
func TestNetClsSetClassid(t *testing.T) {
helper := NewCgroupTestUtil("net_cls", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"net_cls.classid": strconv.FormatUint(classidBefore, 10),
})
helper.CgroupData.config.Resources.NetClsClassid = classidAfter
netcls := &NetClsGroup{}
if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
// As we are in mock environment, we can't get correct value of classid from
// net_cls.classid.
// So. we just judge if we successfully write classid into file
value, err := getCgroupParamUint(helper.CgroupPath, "net_cls.classid")
if err != nil {
t.Fatalf("Failed to parse net_cls.classid - %s", err)
}
if value != classidAfter {
t.Fatal("Got the wrong value, set net_cls.classid failed.")
}
}

View file

@ -0,0 +1,38 @@
// +build linux
package fs
import (
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
prioMap = []*configs.IfPrioMap{
{
Interface: "test",
Priority: 5,
},
}
)
func TestNetPrioSetIfPrio(t *testing.T) {
helper := NewCgroupTestUtil("net_prio", t)
defer helper.cleanup()
helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
netPrio := &NetPrioGroup{}
if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
if err != nil {
t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
}
if !strings.Contains(value, "test 5") {
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
}
}

View file

@ -0,0 +1,111 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
const (
maxUnlimited = -1
maxLimited = 1024
)
func TestPidsSetMax(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.max": "max",
})
helper.CgroupData.config.Resources.PidsLimit = maxLimited
pids := &PidsGroup{}
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "pids.max")
if err != nil {
t.Fatalf("Failed to parse pids.max - %s", err)
}
if value != maxLimited {
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
}
}
func TestPidsSetUnlimited(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.max": strconv.Itoa(maxLimited),
})
helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
pids := &PidsGroup{}
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "pids.max")
if err != nil {
t.Fatalf("Failed to parse pids.max - %s", err)
}
if value != "max" {
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
}
}
func TestPidsStats(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.current": strconv.Itoa(1337),
"pids.max": strconv.Itoa(maxLimited),
})
pids := &PidsGroup{}
stats := *cgroups.NewStats()
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
t.Fatal(err)
}
if stats.PidsStats.Current != 1337 {
t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current)
}
if stats.PidsStats.Limit != maxLimited {
t.Fatalf("Expected %d, got %d for pids.max", maxLimited, stats.PidsStats.Limit)
}
}
func TestPidsStatsUnlimited(t *testing.T) {
helper := NewCgroupTestUtil("pids", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"pids.current": strconv.Itoa(4096),
"pids.max": "max",
})
pids := &PidsGroup{}
stats := *cgroups.NewStats()
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
t.Fatal(err)
}
if stats.PidsStats.Current != 4096 {
t.Fatalf("Expected %d, got %d for pids.current", 4096, stats.PidsStats.Current)
}
if stats.PidsStats.Limit != 0 {
t.Fatalf("Expected %d, got %d for pids.max", 0, stats.PidsStats.Limit)
}
}

View file

@ -0,0 +1,123 @@
// +build linux
package fs
import (
"fmt"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/sirupsen/logrus"
)
func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error {
if len(expected) != len(actual) {
return fmt.Errorf("blkioStatEntries length do not match")
}
for i, expValue := range expected {
actValue := actual[i]
if expValue != actValue {
return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue)
}
}
return nil
}
func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil {
logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil {
logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil {
logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil {
logrus.Printf("blkio SectorsRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil {
logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil {
logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil {
logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil {
logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err)
t.Fail()
}
}
func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) {
if expected != actual {
logrus.Printf("Expected throttling data %v but found %v\n", expected, actual)
t.Fail()
}
}
func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) {
if expected != actual {
logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual)
t.Fail()
}
}
func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) {
expectMemoryDataEquals(t, expected.Usage, actual.Usage)
expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage)
expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage)
if expected.UseHierarchy != actual.UseHierarchy {
logrus.Printf("Expected memory use hiearchy %v, but found %v\n", expected.UseHierarchy, actual.UseHierarchy)
t.Fail()
}
for key, expValue := range expected.Stats {
actValue, ok := actual.Stats[key]
if !ok {
logrus.Printf("Expected memory stat key %s not found\n", key)
t.Fail()
}
if expValue != actValue {
logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue)
t.Fail()
}
}
}
func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) {
if expected.Usage != actual.Usage {
logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage)
t.Fail()
}
if expected.MaxUsage != actual.MaxUsage {
logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage)
t.Fail()
}
if expected.Failcnt != actual.Failcnt {
logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
t.Fail()
}
if expected.Limit != actual.Limit {
logrus.Printf("Expected memory limit %d but found %d\n", expected.Limit, actual.Limit)
t.Fail()
}
}

View file

@ -0,0 +1,67 @@
// +build linux
/*
Utility for testing cgroup operations.
Creates a mock of the cgroup filesystem for the duration of the test.
*/
package fs
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
type cgroupTestUtil struct {
// cgroup data to use in tests.
CgroupData *cgroupData
// Path to the mock cgroup directory.
CgroupPath string
// Temporary directory to store mock cgroup filesystem.
tempDir string
t *testing.T
}
// Creates a new test util for the specified subsystem
func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
d := &cgroupData{
config: &configs.Cgroup{},
}
d.config.Resources = &configs.Resources{}
tempDir, err := ioutil.TempDir("", "cgroup_test")
if err != nil {
t.Fatal(err)
}
d.root = tempDir
testCgroupPath := filepath.Join(d.root, subsystem)
if err != nil {
t.Fatal(err)
}
// Ensure the full mock cgroup path exists.
err = os.MkdirAll(testCgroupPath, 0755)
if err != nil {
t.Fatal(err)
}
return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t}
}
func (c *cgroupTestUtil) cleanup() {
os.RemoveAll(c.tempDir)
}
// Write the specified contents on the mock of the specified cgroup files.
func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) {
for file, contents := range fileContents {
err := writeFile(c.CgroupPath, file, contents)
if err != nil {
c.t.Fatal(err)
}
}
}

View file

@ -12,7 +12,6 @@ import (
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)

View file

@ -0,0 +1,97 @@
// +build linux
package fs
import (
"io/ioutil"
"math"
"os"
"path/filepath"
"strconv"
"testing"
)
const (
cgroupFile = "cgroup.file"
floatValue = 2048.0
floatString = "2048"
)
func TestGetCgroupParamsInt(t *testing.T) {
// Setup tempdir.
tempDir, err := ioutil.TempDir("", "cgroup_utils_test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tempDir)
tempFile := filepath.Join(tempDir, cgroupFile)
// Success.
err = ioutil.WriteFile(tempFile, []byte(floatString), 0755)
if err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %d to equal %f", value, floatValue)
}
// Success with new line.
err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %d to equal %f", value, floatValue)
}
// Success with negative values
err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Success with negative values lesser than min int64
s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64)
err = ioutil.WriteFile(tempFile, []byte(s), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Not a float.
err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamUint(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
// Unknown file.
err = os.Remove(tempFile)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamUint(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
}

View file

@ -51,6 +51,8 @@ type MemoryStats struct {
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
// usage of kernel TCP memory
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`
Stats map[string]uint64 `json:"stats,omitempty"`
}

View file

@ -1,4 +1,4 @@
// +build !linux
// +build !linux static_build
package systemd
@ -43,7 +43,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
return nil, fmt.Errorf("Systemd not supported")
return fmt.Errorf("Systemd not supported")
}
func (m *Manager) Freeze(state configs.FreezerState) error {

View file

@ -1,14 +1,12 @@
// +build linux
// +build linux,!static_build
package systemd
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
@ -19,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
)
type Manager struct {
@ -67,13 +66,16 @@ var subsystems = subsystemSet{
const (
testScopeWait = 4
testSliceWait = 4
)
var (
connLock sync.Mutex
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasStartTransientSliceUnit bool
hasTransientDefaultDependencies bool
hasDelegate bool
)
func newProp(name string, units interface{}) systemdDbus.Property {
@ -146,6 +148,48 @@ func UseSystemd() bool {
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
// Assume StartTransientUnit on a scope allows Delegate
hasDelegate = true
dl := newProp("Delegate", true)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasDelegate = false
}
}
}
// Assume we have the ability to start a transient unit as a slice
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
hasStartTransientSliceUnit = true
// To ensure simple clean-up, we create a slice off the root with no hierarchy
slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
if _, ok := err.(dbus.Error); ok {
hasStartTransientSliceUnit = false
}
}
for i := 0; i <= testSliceWait; i++ {
if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
hasStartTransientSliceUnit = false
break
}
}
} else {
break
}
time.Sleep(time.Millisecond)
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
theConn.StopUnit(slice, "replace", nil)
}
return hasStartTransientUnit
}
@ -179,13 +223,29 @@ func (m *Manager) Apply(pid int) error {
slice = c.Parent
}
properties = append(properties,
systemdDbus.PropSlice(slice),
systemdDbus.PropDescription("docker container "+c.Name),
newProp("PIDs", []uint32{uint32(pid)}),
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
// if we create a slice, the parent is defined via a Wants=
if strings.HasSuffix(unitName, ".slice") {
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
if !hasStartTransientSliceUnit {
return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
}
properties = append(properties, systemdDbus.PropWants(slice))
} else {
// otherwise, we use Slice=
properties = append(properties, systemdDbus.PropSlice(slice))
}
// only add pid if its valid, -1 is used w/ general slice creation.
if pid != -1 {
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
}
if hasDelegate {
// This is only supported on systemd versions 218 and above.
newProp("Delegate", true),
)
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
@ -206,7 +266,21 @@ func (m *Manager) Apply(pid int) error {
if c.Resources.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", uint64(c.Resources.CpuShares)))
newProp("CPUShares", c.Resources.CpuShares))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.BlkioWeight != 0 {
@ -214,20 +288,25 @@ func (m *Manager) Apply(pid int) error {
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
}
// We need to set kernel memory before processes join cgroup because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
// And swap memory limit needs to be set after memory limit, only
// memory limit is handled by systemd, so it's kind of ugly here.
if c.Resources.KernelMemory > 0 {
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if c.Resources.KernelMemory != 0 {
if err := setKernelMemory(c); err != nil {
return err
}
}
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil {
statusChan := make(chan string)
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) {
return err
}
select {
case <-statusChan:
case <-time.After(time.Second):
logrus.Warnf("Timed out while waiting for StartTransientUnit completion signal from dbus. Continuing...")
}
if err := joinCgroups(c, pid); err != nil {
return err
}
@ -269,15 +348,6 @@ func (m *Manager) GetPaths() map[string]string {
return paths
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
path, err := getSubsystemPath(c, subsystem)
if err != nil {
@ -286,10 +356,9 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
return "", err
}
return path, nil
}
@ -299,7 +368,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
switch name {
case "name=systemd":
// let systemd handle this
break
case "cpuset":
path, err := getSubsystemPath(c, name)
if err != nil && !cgroups.IsNotFound(err) {
@ -309,7 +377,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
if err := s.ApplyDir(path, c, pid); err != nil {
return err
}
break
default:
_, err := join(c, name, pid)
if err != nil {
@ -331,10 +398,10 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
return nil
}
// systemd represents slice heirarchy using `-`, so we need to follow suit when
// systemd represents slice hierarchy using `-`, so we need to follow suit when
// generating the path of slice. Essentially, test-a-b.slice becomes
// test.slice/test-a.slice/test-a-b.slice.
func expandSlice(slice string) (string, error) {
// /test.slice/test-a.slice/test-a-b.slice.
func ExpandSlice(slice string) (string, error) {
suffix := ".slice"
// Name has to end with ".slice", but can't be just ".slice".
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
@ -348,6 +415,10 @@ func expandSlice(slice string) (string, error) {
var path, prefix string
sliceName := strings.TrimSuffix(slice, suffix)
// if input was -.slice, we should just return root now
if sliceName == "-" {
return "/", nil
}
for _, component := range strings.Split(sliceName, "-") {
// test--a.slice isn't permitted, nor is -test.slice.
if component == "" {
@ -355,10 +426,9 @@ func expandSlice(slice string) (string, error) {
}
// Append the component to the path and to the prefix.
path += prefix + component + suffix + "/"
path += "/" + prefix + component + suffix
prefix += component + "-"
}
return path, nil
}
@ -368,17 +438,19 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err
}
initPath, err := cgroups.GetInitCgroupDir(subsystem)
initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil {
return "", err
}
// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
slice := "system.slice"
if c.Parent != "" {
slice = c.Parent
}
slice, err = expandSlice(slice)
slice, err = ExpandSlice(slice)
if err != nil {
return "", err
}
@ -439,6 +511,11 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
for _, sys := range subsystems {
// Get the subsystem path, but don't error out for not found cgroups.
path, err := getSubsystemPath(container.Cgroups, sys.Name())
@ -460,7 +537,11 @@ func (m *Manager) Set(container *configs.Config) error {
}
func getUnitName(c *configs.Cgroup) string {
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
// by default, we create a scope unless the user explicitly asks for a slice.
if !strings.HasSuffix(c.Name, ".slice") {
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
}
return c.Name
}
func setKernelMemory(c *configs.Cgroup) error {
@ -472,8 +553,15 @@ func setKernelMemory(c *configs.Cgroup) error {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// This doesn't get called by manager.Set, so we need to do it here.
s := &fs.MemoryGroup{}
return s.SetKernelMemory(path, c)
return fs.EnableKernelMemoryAccounting(path)
}
// isUnitExists returns true if the error is that a systemd unit already exists.
func isUnitExists(err error) bool {
if err != nil {
if dbusError, ok := err.(dbus.Error); ok {
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
}
}
return false
}

View file

@ -16,37 +16,24 @@ import (
"github.com/docker/go-units"
)
const cgroupNamePrefix = "name="
const (
cgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError(subsystem)
mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
@ -70,6 +57,30 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func GetClosestMountpointAncestor(dir, mountinfo string) string {
deepestMountPoint := ""
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
mountInfoParts := strings.Fields(mountInfoEntry)
if len(mountInfoParts) < 5 {
continue
}
mountPoint := mountInfoParts[4]
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
deepestMountPoint = mountPoint
}
}
return deepestMountPoint
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@ -113,7 +124,7 @@ type Mount struct {
Subsystems []string
}
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
@ -121,16 +132,17 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
for scanner.Scan() {
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+9] != "cgroup" {
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
@ -139,12 +151,17 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if !ss[opt] {
continue
}
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
}
if ss[opt] {
} else {
m.Subsystems = append(m.Subsystems, opt)
}
if !all {
numFound++
}
}
res = append(res, m)
}
@ -154,23 +171,25 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
return res, nil
}
func GetCgroupMounts() ([]Mount, error) {
// GetCgroupMounts returns the mounts for the cgroup subsystems.
// all indicates whether to return just the first instance or all the mounts.
func GetCgroupMounts(all bool) ([]Mount, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
all, err := GetAllSubsystems()
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for _, s := range all {
for s := range allSubsystems {
allMap[s] = true
}
return getCgroupMountsHelper(allMap, f)
return getCgroupMountsHelper(allMap, f, all)
}
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
@ -185,9 +204,6 @@ func GetAllSubsystems() ([]string, error) {
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text[0] != '#' {
parts := strings.Fields(text)
@ -196,11 +212,14 @@ func GetAllSubsystems() ([]string, error) {
}
}
}
if err := s.Err(); err != nil {
return nil, err
}
return subsystems, nil
}
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
@ -209,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupDir(subsystem string) (string, error) {
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
@ -219,8 +246,33 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil {
return nil, err
}
@ -243,6 +295,8 @@ func readProcsFile(dir string) ([]int, error) {
return out, nil
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@ -250,21 +304,35 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
defer f.Close()
s := bufio.NewScanner(f)
return parseCgroupFromReader(f)
}
// helper function for ParseCgroupFile to make testing easier
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
s := bufio.NewScanner(r)
cgroups := make(map[string]string)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
parts := strings.Split(text, ":")
// from cgroups(7):
// /proc/[pid]/cgroup
// ...
// For each cgroup hierarchy ... there is one entry
// containing three colon-separated fields of the form:
// hierarchy-ID:subsystem-list:cgroup-path
parts := strings.SplitN(text, ":", 3)
if len(parts) < 3 {
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
}
for _, subs := range strings.Split(parts[1], ",") {
cgroups[subs] = parts[2]
}
}
if err := s.Err(); err != nil {
return nil, err
}
return cgroups, nil
}
@ -291,8 +359,7 @@ func PathExists(path string) bool {
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
if err := WriteCgroupProc(path, pid); err != nil {
return err
}
}
@ -361,7 +428,7 @@ func GetAllPids(path string) ([]int, error) {
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
dir, file := filepath.Split(p)
if file != "cgroup.procs" {
if file != CgroupProcesses {
return nil
}
if iErr != nil {
@ -376,3 +443,20 @@ func GetAllPids(path string) ([]int, error) {
})
return pids, err
}
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
func WriteCgroupProc(dir string, pid int) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", CgroupProcesses)
}
// Dont attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
}
return nil
}

View file

@ -0,0 +1,333 @@
// +build linux
package cgroups
import (
"bytes"
"fmt"
"reflect"
"strings"
"testing"
)
const fedoraMountinfo = `15 35 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
16 35 0:14 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
17 35 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8056484k,nr_inodes=2014121,mode=755
18 16 0:15 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
19 16 0:13 / /sys/fs/selinux rw,relatime shared:8 - selinuxfs selinuxfs rw
20 17 0:16 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
21 17 0:10 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
22 35 0:17 / /run rw,nosuid,nodev shared:21 - tmpfs tmpfs rw,seclabel,mode=755
23 16 0:18 / /sys/fs/cgroup rw,nosuid,nodev,noexec shared:9 - tmpfs tmpfs rw,seclabel,mode=755
24 23 0:19 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
25 16 0:20 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw
26 23 0:21 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,clone_children
27 23 0:22 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu,clone_children
28 23 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,memory,clone_children
29 23 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,devices,clone_children
30 23 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,freezer,clone_children
31 23 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,net_cls,clone_children
32 23 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,blkio,clone_children
33 23 0:28 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,perf_event,clone_children
34 23 0:29 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,hugetlb,clone_children
35 1 253:2 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root--f20 rw,seclabel,data=ordered
36 15 0:30 / /proc/sys/fs/binfmt_misc rw,relatime shared:22 - autofs systemd-1 rw,fd=38,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
37 17 0:12 / /dev/mqueue rw,relatime shared:23 - mqueue mqueue rw,seclabel
38 35 0:31 / /tmp rw shared:24 - tmpfs tmpfs rw,seclabel
39 17 0:32 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel
40 16 0:7 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw
41 16 0:33 / /sys/kernel/config rw,relatime shared:27 - configfs configfs rw
42 35 0:34 / /var/lib/nfs/rpc_pipefs rw,relatime shared:28 - rpc_pipefs sunrpc rw
43 15 0:35 / /proc/fs/nfsd rw,relatime shared:29 - nfsd sunrpc rw
45 35 8:17 / /boot rw,relatime shared:30 - ext4 /dev/sdb1 rw,seclabel,data=ordered
46 35 253:4 / /home rw,relatime shared:31 - ext4 /dev/mapper/ssd-home rw,seclabel,data=ordered
47 35 253:5 / /var/lib/libvirt/images rw,noatime,nodiratime shared:32 - ext4 /dev/mapper/ssd-virt rw,seclabel,discard,data=ordered
48 35 253:12 / /mnt/old rw,relatime shared:33 - ext4 /dev/mapper/HelpDeskRHEL6-FedoraRoot rw,seclabel,data=ordered
121 22 0:36 / /run/user/1000/gvfs rw,nosuid,nodev,relatime shared:104 - fuse.gvfsd-fuse gvfsd-fuse rw,user_id=1000,group_id=1000
124 16 0:37 / /sys/fs/fuse/connections rw,relatime shared:107 - fusectl fusectl rw
165 38 253:3 / /tmp/mnt rw,relatime shared:147 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
167 35 253:15 / /var/lib/docker/devicemapper/mnt/aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,relatime shared:149 - ext4 /dev/mapper/docker-253:2-425882-aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,seclabel,discard,stripe=16,data=ordered
171 35 253:16 / /var/lib/docker/devicemapper/mnt/c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,relatime shared:153 - ext4 /dev/mapper/docker-253:2-425882-c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,seclabel,discard,stripe=16,data=ordered
175 35 253:17 / /var/lib/docker/devicemapper/mnt/1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,relatime shared:157 - ext4 /dev/mapper/docker-253:2-425882-1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,seclabel,discard,stripe=16,data=ordered
179 35 253:18 / /var/lib/docker/devicemapper/mnt/d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,relatime shared:161 - ext4 /dev/mapper/docker-253:2-425882-d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,seclabel,discard,stripe=16,data=ordered
183 35 253:19 / /var/lib/docker/devicemapper/mnt/6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,relatime shared:165 - ext4 /dev/mapper/docker-253:2-425882-6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,seclabel,discard,stripe=16,data=ordered
187 35 253:20 / /var/lib/docker/devicemapper/mnt/8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,relatime shared:169 - ext4 /dev/mapper/docker-253:2-425882-8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,seclabel,discard,stripe=16,data=ordered
191 35 253:21 / /var/lib/docker/devicemapper/mnt/c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,relatime shared:173 - ext4 /dev/mapper/docker-253:2-425882-c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,seclabel,discard,stripe=16,data=ordered
195 35 253:22 / /var/lib/docker/devicemapper/mnt/2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,relatime shared:177 - ext4 /dev/mapper/docker-253:2-425882-2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,seclabel,discard,stripe=16,data=ordered
199 35 253:23 / /var/lib/docker/devicemapper/mnt/37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,relatime shared:181 - ext4 /dev/mapper/docker-253:2-425882-37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,seclabel,discard,stripe=16,data=ordered
203 35 253:24 / /var/lib/docker/devicemapper/mnt/aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,relatime shared:185 - ext4 /dev/mapper/docker-253:2-425882-aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,seclabel,discard,stripe=16,data=ordered
207 35 253:25 / /var/lib/docker/devicemapper/mnt/928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,relatime shared:189 - ext4 /dev/mapper/docker-253:2-425882-928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,seclabel,discard,stripe=16,data=ordered
211 35 253:26 / /var/lib/docker/devicemapper/mnt/0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,relatime shared:193 - ext4 /dev/mapper/docker-253:2-425882-0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,seclabel,discard,stripe=16,data=ordered
215 35 253:27 / /var/lib/docker/devicemapper/mnt/d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,relatime shared:197 - ext4 /dev/mapper/docker-253:2-425882-d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,seclabel,discard,stripe=16,data=ordered
219 35 253:28 / /var/lib/docker/devicemapper/mnt/bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,relatime shared:201 - ext4 /dev/mapper/docker-253:2-425882-bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,seclabel,discard,stripe=16,data=ordered
223 35 253:29 / /var/lib/docker/devicemapper/mnt/7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,relatime shared:205 - ext4 /dev/mapper/docker-253:2-425882-7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,seclabel,discard,stripe=16,data=ordered
227 35 253:30 / /var/lib/docker/devicemapper/mnt/c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,relatime shared:209 - ext4 /dev/mapper/docker-253:2-425882-c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,seclabel,discard,stripe=16,data=ordered
231 35 253:31 / /var/lib/docker/devicemapper/mnt/8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,relatime shared:213 - ext4 /dev/mapper/docker-253:2-425882-8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,seclabel,discard,stripe=16,data=ordered
235 35 253:32 / /var/lib/docker/devicemapper/mnt/1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,relatime shared:217 - ext4 /dev/mapper/docker-253:2-425882-1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,seclabel,discard,stripe=16,data=ordered
239 35 253:33 / /var/lib/docker/devicemapper/mnt/e9aa60c60128cad1 rw,relatime shared:221 - ext4 /dev/mapper/docker-253:2-425882-e9aa60c60128cad1 rw,seclabel,discard,stripe=16,data=ordered
243 35 253:34 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,relatime shared:225 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,seclabel,discard,stripe=16,data=ordered
247 35 253:35 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,relatime shared:229 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,seclabel,discard,stripe=16,data=ordered
31 21 0:23 / /DATA/foo_bla_bla rw,relatime - cifs //foo/BLA\040BLA\040BLA/ rw,sec=ntlm,cache=loose,unc=\\foo\BLA BLA BLA,username=my_login,domain=mydomain.com,uid=12345678,forceuid,gid=12345678,forcegid,addr=10.1.30.10,file_mode=0755,dir_mode=0755,nounix,rsize=61440,wsize=65536,actimeo=1`
const systemdMountinfo = `115 83 0:32 / / rw,relatime - aufs none rw,si=c0bd3d3,dio,dirperm1
116 115 0:35 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
117 115 0:36 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
118 117 0:37 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
119 115 0:38 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
120 119 0:39 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
121 120 0:19 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
122 120 0:20 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
123 120 0:21 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
124 120 0:22 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
125 120 0:23 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio
126 120 0:24 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
127 120 0:25 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children
128 120 0:26 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct
129 120 0:27 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event,release_agent=/run/cgmanager/agents/cgm-release-agent.perf_event
130 115 43:0 /var/lib/docker/volumes/a44a712176377f57c094397330ee04387284c478364eb25f4c3d25f775f25c26/_data /var/lib/docker rw,relatime - ext4 /dev/nbd0 rw,data=ordered
131 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/nbd0 rw,data=ordered
132 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hostname /etc/hostname rw,relatime - ext4 /dev/nbd0 rw,data=ordered
133 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hosts /etc/hosts rw,relatime - ext4 /dev/nbd0 rw,data=ordered
134 117 0:33 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
135 117 0:13 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
136 117 0:12 /1 /dev/console rw,nosuid,noexec,relatime - devpts none rw,gid=5,mode=620,ptmxmode=000
84 115 0:40 / /tmp rw,relatime - tmpfs none rw`
const cgroup2Mountinfo = `18 64 0:18 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
19 64 0:4 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
20 64 0:6 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8171204k,nr_inodes=2042801,mode=755
21 18 0:19 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
22 20 0:20 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
23 20 0:21 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
24 64 0:22 / /run rw,nosuid,nodev shared:24 - tmpfs tmpfs rw,seclabel,mode=755
25 18 0:23 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs ro,seclabel,mode=755
26 25 0:24 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 - cgroup2 cgroup rw
27 18 0:25 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw,seclabel
28 18 0:26 / /sys/firmware/efi/efivars rw,nosuid,nodev,noexec,relatime shared:21 - efivarfs efivarfs rw
29 25 0:27 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,cpu,cpuacct
30 25 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,memory
31 25 0:29 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,net_cls,net_prio
32 25 0:30 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,blkio
33 25 0:31 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,perf_event
34 25 0:32 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,hugetlb
35 25 0:33 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,freezer
36 25 0:34 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,cpuset
37 25 0:35 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices
38 25 0:36 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,pids
61 18 0:37 / /sys/kernel/config rw,relatime shared:22 - configfs configfs rw
64 0 253:0 / / rw,relatime shared:1 - ext4 /dev/mapper/fedora_dhcp--16--129-root rw,seclabel,data=ordered
39 18 0:17 / /sys/fs/selinux rw,relatime shared:23 - selinuxfs selinuxfs rw
40 20 0:16 / /dev/mqueue rw,relatime shared:25 - mqueue mqueue rw,seclabel
41 20 0:39 / /dev/hugepages rw,relatime shared:26 - hugetlbfs hugetlbfs rw,seclabel
`
func TestGetCgroupMounts(t *testing.T) {
type testData struct {
mountInfo string
root string
subsystems map[string]bool
}
testTable := []testData{
{
mountInfo: fedoraMountinfo,
root: "/",
subsystems: map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
"hugetlb": true,
},
},
{
mountInfo: systemdMountinfo,
root: "/system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope",
subsystems: map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
},
},
}
for _, td := range testTable {
mi := bytes.NewBufferString(td.mountInfo)
cgMounts, err := getCgroupMountsHelper(td.subsystems, mi, false)
if err != nil {
t.Fatal(err)
}
cgMap := make(map[string]Mount)
for _, m := range cgMounts {
for _, ss := range m.Subsystems {
cgMap[ss] = m
}
}
for ss := range td.subsystems {
m, ok := cgMap[ss]
if !ok {
t.Fatalf("%s not found", ss)
}
if m.Root != td.root {
t.Fatalf("unexpected root for %s: %s", ss, m.Root)
}
if !strings.HasPrefix(m.Mountpoint, "/sys/fs/cgroup/") && !strings.Contains(m.Mountpoint, ss) {
t.Fatalf("unexpected mountpoint for %s: %s", ss, m.Mountpoint)
}
var ssFound bool
for _, mss := range m.Subsystems {
if mss == ss {
ssFound = true
break
}
}
if !ssFound {
t.Fatalf("subsystem %s not found in Subsystems field %v", ss, m.Subsystems)
}
}
}
}
func BenchmarkGetCgroupMounts(b *testing.B) {
subsystems := map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
"hugetlb": true,
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
mi := bytes.NewBufferString(fedoraMountinfo)
b.StartTimer()
if _, err := getCgroupMountsHelper(subsystems, mi, false); err != nil {
b.Fatal(err)
}
}
}
func TestParseCgroupString(t *testing.T) {
testCases := []struct {
input string
expectedError error
expectedOutput map[string]string
}{
{
// Taken from a CoreOS instance running systemd 225 with CPU/Mem
// accounting enabled in systemd
input: `9:blkio:/
8:freezer:/
7:perf_event:/
6:devices:/system.slice/system-sshd.slice
5:cpuset:/
4:cpu,cpuacct:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service
3:net_cls,net_prio:/
2:memory:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service
1:name=systemd:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service`,
expectedOutput: map[string]string{
"name=systemd": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
"blkio": "/",
"freezer": "/",
"perf_event": "/",
"devices": "/system.slice/system-sshd.slice",
"cpuset": "/",
"cpu": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
"cpuacct": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
"net_cls": "/",
"net_prio": "/",
"memory": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
},
},
{
input: `malformed input`,
expectedError: fmt.Errorf(`invalid cgroup entry: must contain at least two colons: malformed input`),
},
}
for ndx, testCase := range testCases {
out, err := parseCgroupFromReader(strings.NewReader(testCase.input))
if err != nil {
if testCase.expectedError == nil || testCase.expectedError.Error() != err.Error() {
t.Errorf("%v: expected error %v, got error %v", ndx, testCase.expectedError, err)
}
} else {
if !reflect.DeepEqual(testCase.expectedOutput, out) {
t.Errorf("%v: expected output %v, got error %v", ndx, testCase.expectedOutput, out)
}
}
}
}
func TestIgnoreCgroup2Mount(t *testing.T) {
subsystems := map[string]bool{
"cpuset": true,
"cpu": true,
"cpuacct": true,
"memory": true,
"devices": true,
"freezer": true,
"net_cls": true,
"blkio": true,
"perf_event": true,
"pids": true,
"name=systemd": true,
}
mi := bytes.NewBufferString(cgroup2Mountinfo)
cgMounts, err := getCgroupMountsHelper(subsystems, mi, false)
if err != nil {
t.Fatal(err)
}
for _, m := range cgMounts {
if m.Mountpoint == "/sys/fs/cgroup/systemd" {
t.Errorf("parsed a cgroup2 mount at /sys/fs/cgroup/systemd instead of ignoring it")
}
}
}
const fakeMountInfo = ` 18 24 0:17 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
100 99 1:31 / /foo/bar rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/baz2 rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/baz rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/bazza rw,relatime - fake fake rw,fake
100 99 1:31 / /foo/bar/baz3 rw,relatime - fake fake rw,fake
100 99 1:31 / /foo rw,relatime - fake fake rw,fake
100 99 1:31 / /unrelated rw,relatime - fake fake rw,fake
100 99 1:31 / / rw,relatime - fake fake rw,fake
`
func TestGetClosestMountpointAncestor(t *testing.T) {
testCases := []struct {
input string
mountinfos string
output string
}{
{input: "/foo/bar/baz/a/b/c", mountinfos: fakeMountInfo, output: "/foo/bar/baz"},
{input: "/foo/bar/baz", mountinfos: fakeMountInfo, output: "/foo/bar/baz"},
{input: "/foo/bar/bazza", mountinfos: fakeMountInfo, output: "/foo/bar/bazza"},
{input: "/a/b/c/d", mountinfos: fakeMountInfo, output: "/"},
}
for _, c := range testCases {
mountpoint := GetClosestMountpointAncestor(c.input, c.mountinfos)
if mountpoint != c.output {
t.Errorf("expected %s, got %s", c.output, mountpoint)
}
}
}

View file

@ -1,11 +0,0 @@
// +build linux,!go1.5
package libcontainer
import "syscall"
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
// with earlier versions
func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = false
}

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package configs
type FreezerState string
@ -22,7 +20,7 @@ type Cgroup struct {
// The path is assumed to be relative to the host system cgroup mountpoint.
Path string `json:"path"`
// ScopePrefix decribes prefix for the scope name
// ScopePrefix describes prefix for the scope name
ScopePrefix string `json:"scope_prefix"`
// Paths represent the absolute cgroups paths to join.
@ -36,7 +34,7 @@ type Cgroup struct {
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
// Deprecated
AllowAllDevices bool `json:"allow_all_devices,omitempty"`
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
@ -60,19 +58,19 @@ type Resources struct {
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
CpuShares uint64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
CpuPeriod uint64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_quota"`
CpuRtRuntime int64 `json:"cpu_rt_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_period"`
CpuRtPeriod uint64 `json:"cpu_rt_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
@ -95,7 +93,7 @@ type Resources struct {
// IO read rate limit per cgroup per device, bytes per second.
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
// IO write rate limit per cgroup per divice, bytes per second.
// IO write rate limit per cgroup per device, bytes per second.
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
// IO read rate limit per cgroup per device, IO per second.
@ -114,11 +112,11 @@ type Resources struct {
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness *int64 `json:"memory_swappiness"`
MemorySwappiness *uint64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid string `json:"net_cls_classid"`
NetClsClassid uint32 `json:"net_cls_classid_u"`
}

View file

@ -1,6 +0,0 @@
// +build !windows,!linux,!freebsd
package configs
type Cgroup struct {
}

View file

@ -7,7 +7,9 @@ import (
"os/exec"
"time"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type Rlimit struct {
@ -85,11 +87,6 @@ type Config struct {
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
@ -117,8 +114,8 @@ type Config struct {
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities"`
// All capabilities not specified will be dropped from the processes capability mask
Capabilities *Capabilities `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
@ -148,10 +145,6 @@ type Config struct {
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj int `json:"oom_score_adj"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
@ -187,6 +180,17 @@ type Config struct {
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
// to limit the resources (e.g., L3 cache) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
}
type Hooks struct {
@ -201,6 +205,19 @@ type Hooks struct {
Poststop []Hook
}
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
Bounding []string
// Effective is the set of capabilities checked by the kernel.
Effective []string
// Inheritable is the capabilities preserved across execve.
Inheritable []string
// Permitted is the limiting superset for effective capabilities.
Permitted []string
// Ambient is the ambient set of capabilities that are kept.
Ambient []string
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state struct {
Prestart []CommandHook
@ -248,13 +265,7 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
}
// HookState is the payload provided to a hook on execution.
type HookState struct {
Version string `json:"ociVersion"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
BundlePath string `json:"bundlePath"`
}
type HookState specs.State
type Hook interface {
// Run executes the hook with the provided state.
@ -300,29 +311,38 @@ func (c Command) Run(s HookState) error {
if err != nil {
return err
}
var stdout, stderr bytes.Buffer
cmd := exec.Cmd{
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
Stdout: &stdout,
Stderr: &stderr,
}
if err := cmd.Start(); err != nil {
return err
}
errC := make(chan error, 1)
go func() {
out, err := cmd.CombinedOutput()
err := cmd.Wait()
if err != nil {
err = fmt.Errorf("%s: %s", err, out)
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
}
errC <- err
}()
var timerCh <-chan time.Time
if c.Timeout != nil {
select {
case err := <-errC:
return err
case <-time.After(*c.Timeout):
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}
timer := time.NewTimer(*c.Timeout)
defer timer.Stop()
timerCh = timer.C
}
select {
case err := <-errC:
return err
case <-timerCh:
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}
return <-errC
}

View file

@ -0,0 +1,61 @@
package configs
import "fmt"
// HostUID gets the translated uid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootUID() (int, error) {
return c.HostUID(0)
}
// HostGID gets the translated gid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootGID() (int, error) {
return c.HostGID(0)
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View file

@ -0,0 +1,130 @@
package configs
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"testing"
)
func loadConfig(name string) (*Config, error) {
f, err := os.Open(filepath.Join("../sample_configs", name))
if err != nil {
return nil, err
}
defer f.Close()
var container *Config
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
// Check that a config doesn't contain extra fields
var configMap, abstractMap map[string]interface{}
if _, err := f.Seek(0, 0); err != nil {
return nil, err
}
if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
return nil, err
}
configData, err := json.Marshal(&container)
if err != nil {
return nil, err
}
if err := json.Unmarshal(configData, &configMap); err != nil {
return nil, err
}
for k := range configMap {
delete(abstractMap, k)
}
if len(abstractMap) != 0 {
return nil, fmt.Errorf("unknown fields: %s", abstractMap)
}
return container, nil
}
func TestRemoveNamespace(t *testing.T) {
ns := Namespaces{
{Type: NEWNET},
}
if !ns.Remove(NEWNET) {
t.Fatal("NEWNET was not removed")
}
if len(ns) != 0 {
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
}
}
func TestHostRootUIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostRootUID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
}
}
func TestHostRootUIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
UidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostRootUID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
}
}
func TestHostRootGIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostRootGID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
}
}
func TestHostRootGIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
GidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostRootGID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
}
}

View file

@ -0,0 +1,191 @@
package configs_test
import (
"encoding/json"
"fmt"
"os"
"reflect"
"testing"
"time"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestUnmarshalHooks(t *testing.T) {
timeout := time.Second
prestartCmd := configs.NewCommandHook(configs.Command{
Path: "/var/vcap/hooks/prestart",
Args: []string{"--pid=123"},
Env: []string{"FOO=BAR"},
Dir: "/var/vcap",
Timeout: &timeout,
})
prestart, err := json.Marshal(prestartCmd.Command)
if err != nil {
t.Fatal(err)
}
hook := configs.Hooks{}
err = hook.UnmarshalJSON([]byte(fmt.Sprintf(`{"Prestart" :[%s]}`, prestart)))
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(hook.Prestart[0], prestartCmd) {
t.Errorf("Expected prestart to equal %+v but it was %+v",
prestartCmd, hook.Prestart[0])
}
}
func TestUnmarshalHooksWithInvalidData(t *testing.T) {
hook := configs.Hooks{}
err := hook.UnmarshalJSON([]byte(`{invalid-json}`))
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestMarshalHooks(t *testing.T) {
timeout := time.Second
prestartCmd := configs.NewCommandHook(configs.Command{
Path: "/var/vcap/hooks/prestart",
Args: []string{"--pid=123"},
Env: []string{"FOO=BAR"},
Dir: "/var/vcap",
Timeout: &timeout,
})
hook := configs.Hooks{
Prestart: []configs.Hook{prestartCmd},
}
hooks, err := hook.MarshalJSON()
if err != nil {
t.Fatal(err)
}
h := `{"poststart":null,"poststop":null,"prestart":[{"path":"/var/vcap/hooks/prestart","args":["--pid=123"],"env":["FOO=BAR"],"dir":"/var/vcap","timeout":1000000000}]}`
if string(hooks) != h {
t.Errorf("Expected hooks %s to equal %s", string(hooks), h)
}
}
func TestMarshalUnmarshalHooks(t *testing.T) {
timeout := time.Second
prestart := configs.NewCommandHook(configs.Command{
Path: "/var/vcap/hooks/prestart",
Args: []string{"--pid=123"},
Env: []string{"FOO=BAR"},
Dir: "/var/vcap",
Timeout: &timeout,
})
hook := configs.Hooks{
Prestart: []configs.Hook{prestart},
}
hooks, err := hook.MarshalJSON()
if err != nil {
t.Fatal(err)
}
umMhook := configs.Hooks{}
err = umMhook.UnmarshalJSON(hooks)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(umMhook.Prestart[0], prestart) {
t.Errorf("Expected hooks to be equal after mashaling -> unmarshaling them: %+v, %+v", umMhook.Prestart[0], prestart)
}
}
func TestMarshalHooksWithUnexpectedType(t *testing.T) {
fHook := configs.NewFunctionHook(func(configs.HookState) error {
return nil
})
hook := configs.Hooks{
Prestart: []configs.Hook{fHook},
}
hooks, err := hook.MarshalJSON()
if err != nil {
t.Fatal(err)
}
h := `{"poststart":null,"poststop":null,"prestart":null}`
if string(hooks) != h {
t.Errorf("Expected hooks %s to equal %s", string(hooks), h)
}
}
func TestFuncHookRun(t *testing.T) {
state := configs.HookState{
Version: "1",
ID: "1",
Pid: 1,
Bundle: "/bundle",
}
fHook := configs.NewFunctionHook(func(s configs.HookState) error {
if !reflect.DeepEqual(state, s) {
t.Errorf("Expected state %+v to equal %+v", state, s)
}
return nil
})
fHook.Run(state)
}
func TestCommandHookRun(t *testing.T) {
state := configs.HookState{
Version: "1",
ID: "1",
Pid: 1,
Bundle: "/bundle",
}
timeout := time.Second
cmdHook := configs.NewCommandHook(configs.Command{
Path: os.Args[0],
Args: []string{os.Args[0], "-test.run=TestHelperProcess"},
Env: []string{"FOO=BAR"},
Dir: "/",
Timeout: &timeout,
})
err := cmdHook.Run(state)
if err != nil {
t.Errorf(fmt.Sprintf("Expected error to not occur but it was %+v", err))
}
}
func TestCommandHookRunTimeout(t *testing.T) {
state := configs.HookState{
Version: "1",
ID: "1",
Pid: 1,
Bundle: "/bundle",
}
timeout := (10 * time.Millisecond)
cmdHook := configs.NewCommandHook(configs.Command{
Path: os.Args[0],
Args: []string{os.Args[0], "-test.run=TestHelperProcessWithTimeout"},
Env: []string{"FOO=BAR"},
Dir: "/",
Timeout: &timeout,
})
err := cmdHook.Run(state)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestHelperProcess(*testing.T) {
fmt.Println("Helper Process")
os.Exit(0)
}
func TestHelperProcessWithTimeout(*testing.T) {
time.Sleep(time.Second)
}

View file

@ -1,51 +0,0 @@
// +build freebsd linux
package configs
import "fmt"
// HostUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// HostGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
}
return id, nil
}
// Return default root gid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View file

@ -0,0 +1,3 @@
package configs
// All current tests are for Unix-specific functionality

View file

@ -1,4 +1,4 @@
// +build linux freebsd
// +build linux
package configs
@ -107,19 +107,5 @@ var (
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/docker/docker/issues/514
// https://github.com/docker/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
Major: 10,
Minor: 229,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
)

View file

@ -0,0 +1,7 @@
package configs
type IntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}

View file

@ -1,5 +1,11 @@
package configs
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota
)
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
@ -22,6 +28,9 @@ type Mount struct {
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package configs
import (
@ -22,8 +20,8 @@ var (
supportedNamespaces = make(map[NamespaceType]bool)
)
// nsToFile converts the namespace type to its filename
func nsToFile(ns NamespaceType) string {
// NsName converts the namespace type to its filename
func NsName(ns NamespaceType) string {
switch ns {
case NEWNET:
return "net"
@ -50,7 +48,7 @@ func IsNamespaceSupported(ns NamespaceType) bool {
if ok {
return supported
}
nsFile := nsToFile(ns)
nsFile := NsName(ns)
// if the namespace type is unknown, just return false
if nsFile == "" {
return false
@ -64,12 +62,12 @@ func IsNamespaceSupported(ns NamespaceType) bool {
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWUSER, // Keep user NS always first, don't move it.
NEWIPC,
NEWUTS,
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}
@ -81,10 +79,7 @@ type Namespace struct {
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type))
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
}
func (n *Namespaces) Remove(t NamespaceType) bool {

View file

@ -2,19 +2,19 @@
package configs
import "syscall"
import "golang.org/x/sys/unix"
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: syscall.CLONE_NEWNET,
NEWNS: syscall.CLONE_NEWNS,
NEWUSER: syscall.CLONE_NEWUSER,
NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID,
NEWNET: unix.CLONE_NEWNET,
NEWNS: unix.CLONE_NEWNS,
NEWUSER: unix.CLONE_NEWUSER,
NEWIPC: unix.CLONE_NEWIPC,
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
}
// CloneFlags parses the container's Namespaces options to set the correct

View file

@ -4,12 +4,10 @@ package configs
func (n *Namespace) Syscall() int {
panic("No namespace syscall support")
return 0
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
return uintptr(0)
}

View file

@ -1,4 +1,4 @@
// +build !linux,!freebsd
// +build !linux
package configs

View file

@ -0,0 +1,117 @@
package validate
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
geteuid = os.Geteuid
getegid = os.Getegid
)
func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this
// has to be verified by setupUser() in init_linux.go.
return nil
}
func hasIDMapping(id int, mappings []configs.IDMap) bool {
for _, m := range mappings {
if id >= m.ContainerID && id < m.ContainerID+m.Size {
return true
}
}
return false
}
func rootlessMappings(config *configs.Config) error {
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
}
if len(config.UidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one UID mapping")
}
if len(config.GidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one UID mapping")
}
return nil
}
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
}
// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())
// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
}
return nil
}
// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
for _, mount := range config.Mounts {
// Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") {
var uid int
n, err := fmt.Sscanf(opt, "uid=%d", &uid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(uid, config.UidMappings) {
return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
}
}
if strings.HasPrefix(opt, "gid=") {
var gid int
n, err := fmt.Sscanf(opt, "gid=%d", &gid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(gid, config.GidMappings) {
return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
}
}
}
}
return nil
}

View file

@ -0,0 +1,159 @@
package validate
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func init() {
geteuid = func() int { return 1337 }
getegid = func() int { return 7331 }
}
func rootlessConfig() *configs.Config {
return &configs.Config{
Rootfs: "/var",
Rootless: true,
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUSER},
},
),
UidMappings: []configs.IDMap{
{
HostID: geteuid(),
ContainerID: 0,
Size: 1,
},
},
GidMappings: []configs.IDMap{
{
HostID: getegid(),
ContainerID: 0,
Size: 1,
},
},
}
}
func TestValidateRootless(t *testing.T) {
validator := New()
config := rootlessConfig()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
/* rootlessMappings() */
func TestValidateRootlessUserns(t *testing.T) {
validator := New()
config := rootlessConfig()
config.Namespaces = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if user namespaces not set")
}
}
func TestValidateRootlessMappingUid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.UidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no uid mappings provided")
}
}
func TestValidateRootlessMappingGid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.GidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no gid mappings provided")
}
}
/* rootlessMount() */
func TestValidateRootlessMountUid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "uid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting uid=5 in mount options")
}
config.Mounts[0].Data = "uid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err)
}
config.Mounts[0].Data = "uid=2"
config.UidMappings[0].Size = 10
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting uid=2 in mount options and UidMapping[0].size is 10")
}
config.Mounts[0].Data = "uid=20"
config.UidMappings[0].Size = 10
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting uid=20 in mount options and UidMapping[0].size is 10")
}
}
func TestValidateRootlessMountGid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "gid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting gid=5 in mount options")
}
config.Mounts[0].Data = "gid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err)
}
config.Mounts[0].Data = "gid=5"
config.GidMappings[0].Size = 10
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting gid=5 in mount options and GidMapping[0].size is 10")
}
config.Mounts[0].Data = "gid=11"
config.GidMappings[0].Size = 10
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting gid=11 in mount options and GidMapping[0].size is 10")
}
}

View file

@ -7,6 +7,8 @@ import (
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
selinux "github.com/opencontainers/selinux/go-selinux"
)
type Validator interface {
@ -39,12 +41,26 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.sysctl(config); err != nil {
return err
}
if err := v.intelrdt(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
}
}
return nil
}
// rootfs validates if the rootfs is an absolute path and is not a symlink
// to the container's root filesystem.
func (v *ConfigValidator) rootfs(config *configs.Config) error {
if _, err := os.Stat(config.Rootfs); err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
}
return err
}
cleaned, err := filepath.Abs(config.Rootfs)
if err != nil {
return err
@ -80,6 +96,10 @@ func (v *ConfigValidator) security(config *configs.Config) error {
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
if config.ProcessLabel != "" && !selinux.GetEnabled() {
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
}
return nil
}
@ -121,6 +141,11 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
}
if strings.HasPrefix(s, "net.") {
if config.Namespaces.Contains(configs.NEWNET) {
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
if err := checkHostNs(s, path); err != nil {
return err
}
}
continue
} else {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
@ -131,3 +156,57 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
return nil
}
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
if config.IntelRdt != nil {
if !intelrdt.IsEnabled() {
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
}
if config.IntelRdt.L3CacheSchema == "" {
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
}
}
return nil
}
func isSymbolicLink(path string) (bool, error) {
fi, err := os.Lstat(path)
if err != nil {
return false, err
}
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
}
// checkHostNs checks whether network sysctl is used in host namespace.
func checkHostNs(sysctlConfig string, path string) error {
var currentProcessNetns = "/proc/self/ns/net"
// readlink on the current processes network namespace
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
if err != nil {
return fmt.Errorf("read soft link %q error", currentProcessNetns)
}
// First check if the provided path is a symbolic link
symLink, err := isSymbolicLink(path)
if err != nil {
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
}
if symLink == false {
// The provided namespace is not a symbolic link,
// it is not the host namespace.
return nil
}
// readlink on the path provided in the struct
destOfContainer, err := os.Readlink(path)
if err != nil {
return fmt.Errorf("read soft link %q error", path)
}
if destOfContainer == destOfCurrentProcess {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
}
return nil
}

View file

@ -0,0 +1,267 @@
package validate_test
import (
"os"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
func TestValidate(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateWithInvalidRootfs(t *testing.T) {
dir := "rootfs"
os.Symlink("/var", dir)
defer os.Remove(dir)
config := &configs.Config{
Rootfs: dir,
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateNetworkWithoutNETNamespace(t *testing.T) {
network := &configs.Network{Type: "loopback"}
config := &configs.Config{
Rootfs: "/var",
Namespaces: []configs.Namespace{},
Networks: []*configs.Network{network},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateNetworkRoutesWithoutNETNamespace(t *testing.T) {
route := &configs.Route{Gateway: "255.255.255.0"}
config := &configs.Config{
Rootfs: "/var",
Namespaces: []configs.Namespace{},
Routes: []*configs.Route{route},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateHostname(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Hostname: "runc",
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUTS},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateHostnameWithoutUTSNamespace(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Hostname: "runc",
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateSecurityWithMaskPaths(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
MaskPaths: []string{"/proc/kcore"},
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWNS},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateSecurityWithROPaths(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
ReadonlyPaths: []string{"/proc/sys"},
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWNS},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
func TestValidateSecurityWithoutNEWNS(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
MaskPaths: []string{"/proc/kcore"},
ReadonlyPaths: []string{"/proc/sys"},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateUsernamespace(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
config := &configs.Config{
Rootfs: "/var",
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUSER},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("expected error to not occur %+v", err)
}
}
func TestValidateUsernamespaceWithoutUserNS(t *testing.T) {
uidMap := configs.IDMap{ContainerID: 123}
config := &configs.Config{
Rootfs: "/var",
UidMappings: []configs.IDMap{uidMap},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateSysctl(t *testing.T) {
sysctl := map[string]string{
"fs.mqueue.ctl": "ctl",
"net.ctl": "ctl",
"kernel.ctl": "ctl",
}
for k, v := range sysctl {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{k: v},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
}
func TestValidateValidSysctl(t *testing.T) {
sysctl := map[string]string{
"fs.mqueue.ctl": "ctl",
"net.ctl": "ctl",
"kernel.msgmax": "ctl",
}
for k, v := range sysctl {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{k: v},
Namespaces: []configs.Namespace{
{
Type: configs.NEWNET,
},
{
Type: configs.NEWIPC,
},
},
}
validator := validate.New()
err := validator.Validate(config)
if err != nil {
t.Errorf("Expected error to not occur with {%s=%s} but got: %q", k, v, err)
}
}
}
func TestValidateSysctlWithSameNs(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{"net.ctl": "ctl"},
Namespaces: configs.Namespaces(
[]configs.Namespace{
{
Type: configs.NEWNET,
Path: "/proc/self/ns/net",
},
},
),
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}
func TestValidateSysctlWithoutNETNamespace(t *testing.T) {
config := &configs.Config{
Rootfs: "/var",
Sysctl: map[string]string{"net.ctl": "ctl"},
Namespaces: []configs.Namespace{},
}
validator := validate.New()
err := validator.Validate(config)
if err == nil {
t.Error("Expected error to occur but it was nil")
}
}

View file

@ -1,15 +0,0 @@
package libcontainer
import "io"
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriter
io.Closer
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
Fd() uintptr
}

View file

@ -1,13 +0,0 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View file

@ -1,145 +1,41 @@
package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/opencontainers/runc/libcontainer/label"
"golang.org/x/sys/unix"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// newConsoleFromPath is an internal function returning an initialized console for use inside
// a container's MNT namespace.
func newConsoleFromPath(slavePath string) *linuxConsole {
return &linuxConsole{
slavePath: slavePath,
}
}
// linuxConsole is a linux psuedo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) Fd() uintptr {
return c.master.Fd()
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
func mountConsole(slavePath string) error {
oldMask := unix.Umask(0000)
defer unix.Umask(oldMask)
f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(syscall.O_RDWR)
func dupStdio(slavePath string) error {
fd, err := unix.Open(slavePath, unix.O_RDWR, 0)
if err != nil {
return err
return &os.PathError{
Op: "open",
Path: slavePath,
Err: err,
}
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := syscall.Dup3(fd, i, 0); err != nil {
if err := unix.Dup3(fd, i, 0); err != nil {
return err
}
}
return nil
}
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := syscall.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

View file

@ -1,11 +0,0 @@
package libcontainer
import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View file

@ -1,30 +0,0 @@
package libcontainer
// NewConsole returns an initalized console that can be used within a container
func NewConsole(uid, gid int) (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows psuedo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View file

@ -15,20 +15,16 @@ import (
type Status int
const (
// Created is the status that denotes the container exists but has not been run yet
// Created is the status that denotes the container exists but has not been run yet.
Created Status = iota
// Created is the status that denotes the container exists and is running.
// Running is the status that denotes the container exists and is running.
Running
// Pausing is the status that denotes the container exists, it is in the process of being paused.
Pausing
// Paused is the status that denotes the container exists, but all its processes are paused.
Paused
// Destroyed is the status that denotes the container does not exist.
Destroyed
// Stopped is the status that denotes the container does not have a created or running process.
Stopped
)
func (s Status) String() string {
@ -41,8 +37,8 @@ func (s Status) String() string {
return "pausing"
case Paused:
return "paused"
case Destroyed:
return "destroyed"
case Stopped:
return "stopped"
default:
return "unknown"
}
@ -58,7 +54,7 @@ type BaseState struct {
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time in clock cycles since boot time.
InitProcessStartTime string `json:"init_process_start"`
InitProcessStartTime uint64 `json:"init_process_start"`
// Created is the unix timestamp for the creation time of the container in UTC
Created time.Time `json:"created"`
@ -79,14 +75,14 @@ type BaseContainer interface {
// Returns the current status of the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
State() (*State, error)
// Returns the current config of the container.
@ -95,7 +91,7 @@ type BaseContainer interface {
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
@ -105,7 +101,7 @@ type BaseContainer interface {
// Returns statistics for the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Stats() (*Stats, error)
@ -114,31 +110,57 @@ type BaseContainer interface {
// We can use this to change resources when containers are running.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
// start. You can track process lifecycle with passed Process structure.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// Systemerror - System error.
// SystemError - System error.
Start(process *Process) (err error)
// Destroys the container after killing all running processes.
// Run immediately starts the process inside the container. Returns error if process
// fails to start. It does not block waiting for the exec fifo after start returns but
// opens the fifo after start returns.
//
// errors:
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Run(process *Process) (err error)
// Destroys the container, if its in a valid state, after killing any
// remaining running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// Running containers must first be stopped using Signal(..).
// Paused containers must first be resumed using Resume(..).
//
// errors:
// Systemerror - System error.
// ContainerNotStopped - Container is still running,
// ContainerPaused - Container is paused,
// SystemError - System error.
Destroy() error
// Signal sends the provided signal code to the container's initial process.
//
// If all is specified the signal is sent to all processes in the container
// including the initial process.
//
// errors:
// Systemerror - System error.
Signal(s os.Signal) error
// SystemError - System error.
Signal(s os.Signal, all bool) error
// Exec signals the container to exec the users process at the end of the init.
//
// errors:
// SystemError - System error.
Exec() error
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,345 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
)
type mockCgroupManager struct {
pids []int
allPids []int
stats *cgroups.Stats
paths map[string]string
}
type mockIntelRdtManager struct {
stats *intelrdt.Stats
path string
}
func (m *mockCgroupManager) GetPids() ([]int, error) {
return m.pids, nil
}
func (m *mockCgroupManager) GetAllPids() ([]int, error) {
return m.allPids, nil
}
func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) {
return m.stats, nil
}
func (m *mockCgroupManager) Apply(pid int) error {
return nil
}
func (m *mockCgroupManager) Set(container *configs.Config) error {
return nil
}
func (m *mockCgroupManager) Destroy() error {
return nil
}
func (m *mockCgroupManager) GetPaths() map[string]string {
return m.paths
}
func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
return nil
}
func (m *mockIntelRdtManager) Apply(pid int) error {
return nil
}
func (m *mockIntelRdtManager) GetStats() (*intelrdt.Stats, error) {
return m.stats, nil
}
func (m *mockIntelRdtManager) Destroy() error {
return nil
}
func (m *mockIntelRdtManager) GetPath() string {
return m.path
}
func (m *mockIntelRdtManager) Set(container *configs.Config) error {
return nil
}
type mockProcess struct {
_pid int
started uint64
}
func (m *mockProcess) terminate() error {
return nil
}
func (m *mockProcess) pid() int {
return m._pid
}
func (m *mockProcess) startTime() (uint64, error) {
return m.started, nil
}
func (m *mockProcess) start() error {
return nil
}
func (m *mockProcess) wait() (*os.ProcessState, error) {
return nil, nil
}
func (m *mockProcess) signal(_ os.Signal) error {
return nil
}
func (m *mockProcess) externalDescriptors() []string {
return []string{}
}
func (m *mockProcess) setExternalDescriptors(newFds []string) {
}
func TestGetContainerPids(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{allPids: []int{1, 2, 3}},
}
pids, err := container.Processes()
if err != nil {
t.Fatal(err)
}
for i, expected := range []int{1, 2, 3} {
if pids[i] != expected {
t.Fatalf("expected pid %d but received %d", expected, pids[i])
}
}
}
func TestGetContainerStats(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
},
intelRdtManager: &mockIntelRdtManager{
stats: &intelrdt.Stats{
L3CacheSchema: "L3:0=f;1=f0",
},
},
}
stats, err := container.Stats()
if err != nil {
t.Fatal(err)
}
if stats.CgroupStats == nil {
t.Fatal("cgroup stats are nil")
}
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage)
}
if intelrdt.IsEnabled() {
if stats.IntelRdtStats == nil {
t.Fatal("intel rdt stats are nil")
}
if stats.IntelRdtStats.L3CacheSchema != "L3:0=f;1=f0" {
t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but recevied %s", stats.IntelRdtStats.L3CacheSchema)
}
}
}
func TestGetContainerState(t *testing.T) {
var (
pid = os.Getpid()
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
expectedNetworkPath = fmt.Sprintf("/proc/%d/ns/net", pid)
expectedIntelRdtPath = "/sys/fs/resctrl/myid"
)
container := &linuxContainer{
id: "myid",
config: &configs.Config{
Namespaces: []configs.Namespace{
{Type: configs.NEWPID},
{Type: configs.NEWNS},
{Type: configs.NEWNET, Path: expectedNetworkPath},
{Type: configs.NEWUTS},
// emulate host for IPC
//{Type: configs.NEWIPC},
},
},
initProcess: &mockProcess{
_pid: pid,
started: 10,
},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
paths: map[string]string{
"memory": expectedMemoryPath,
},
},
intelRdtManager: &mockIntelRdtManager{
stats: &intelrdt.Stats{
L3CacheSchema: "L3:0=f0;1=f",
},
path: expectedIntelRdtPath,
},
}
container.state = &createdState{c: container}
state, err := container.State()
if err != nil {
t.Fatal(err)
}
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != 10 {
t.Fatalf("expected process start time 10 but received %d", state.InitProcessStartTime)
}
paths := state.CgroupPaths
if paths == nil {
t.Fatal("cgroup paths should not be nil")
}
if memPath := paths["memory"]; memPath != expectedMemoryPath {
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
}
if intelrdt.IsEnabled() {
intelRdtPath := state.IntelRdtPath
if intelRdtPath == "" {
t.Fatal("intel rdt path should not be empty")
}
if intelRdtPath != expectedIntelRdtPath {
t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath)
}
}
for _, ns := range container.config.Namespaces {
path := state.NamespacePaths[ns.Type]
if path == "" {
t.Fatalf("expected non nil namespace path for %s", ns.Type)
}
if ns.Type == configs.NEWNET {
if path != expectedNetworkPath {
t.Fatalf("expected path %q but received %q", expectedNetworkPath, path)
}
} else {
file := ""
switch ns.Type {
case configs.NEWNET:
file = "net"
case configs.NEWNS:
file = "mnt"
case configs.NEWPID:
file = "pid"
case configs.NEWIPC:
file = "ipc"
case configs.NEWUSER:
file = "user"
case configs.NEWUTS:
file = "uts"
}
expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file)
if expected != path {
t.Fatalf("expected path %q but received %q", expected, path)
}
}
}
}
func TestGetContainerStateAfterUpdate(t *testing.T) {
var (
pid = os.Getpid()
)
stat, err := system.Stat(pid)
if err != nil {
t.Fatal(err)
}
rootDir, err := ioutil.TempDir("", "TestGetContainerStateAfterUpdate")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(rootDir)
container := &linuxContainer{
root: rootDir,
id: "myid",
config: &configs.Config{
Namespaces: []configs.Namespace{
{Type: configs.NEWPID},
{Type: configs.NEWNS},
{Type: configs.NEWNET},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
},
Cgroups: &configs.Cgroup{
Resources: &configs.Resources{
Memory: 1024,
},
},
},
initProcess: &mockProcess{
_pid: pid,
started: stat.StartTime,
},
cgroupManager: &mockCgroupManager{},
}
container.state = &createdState{c: container}
state, err := container.State()
if err != nil {
t.Fatal(err)
}
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != stat.StartTime {
t.Fatalf("expected process start time %d but received %d", stat.StartTime, state.InitProcessStartTime)
}
if state.Config.Cgroups.Resources.Memory != 1024 {
t.Fatalf("expected Memory to be 1024 but received %q", state.Config.Cgroups.Memory)
}
// Set initProcessStartTime so we fake to be running
container.initProcessStartTime = state.InitProcessStartTime
container.state = &runningState{c: container}
newConfig := container.Config()
newConfig.Cgroups.Resources.Memory = 2048
if err := container.Set(newConfig); err != nil {
t.Fatal(err)
}
state, err = container.State()
if err != nil {
t.Fatal(err)
}
if state.Config.Cgroups.Resources.Memory != 2048 {
t.Fatalf("expected Memory to be 2048 but received %q", state.Config.Cgroups.Memory)
}
}

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package libcontainer
// cgroup restoring strategy provided by criu
@ -25,13 +23,18 @@ type VethPairName struct {
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
ParentImage string // directory for storing parent image files in pre-dump and dump
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
FileLocks bool // handle file locks, for safety
PreDump bool // call criu predump to perform iterative checkpoint
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
AutoDedup bool // auto deduplication for incremental dumps
LazyPages bool // restore memory pages lazily using userfaultfd
StatusFd string // fd for feedback when lazy server is ready
}

View file

@ -1,6 +0,0 @@
package libcontainer
// TODO Windows: This can ultimately be entirely factored out as criu is
// a Unix concept not relevant on Windows.
type CriuOpts struct {
}

View file

@ -0,0 +1,2 @@
gen: criurpc.proto
protoc --go_out=. criurpc.proto

View file

@ -12,6 +12,7 @@ It has these top-level messages:
CriuPageServerInfo
CriuVethPair
ExtMountMap
JoinNamespace
InheritFd
CgroupRoot
UnixSk
@ -22,21 +23,30 @@ It has these top-level messages:
CriuFeatures
CriuReq
CriuResp
CriuVersion
*/
package criurpc
import proto "github.com/golang/protobuf/proto"
import fmt "fmt"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
type CriuCgMode int32
const (
CriuCgMode_IGNORE CriuCgMode = 0
CriuCgMode_NONE CriuCgMode = 1
CriuCgMode_CG_NONE CriuCgMode = 1
CriuCgMode_PROPS CriuCgMode = 2
CriuCgMode_SOFT CriuCgMode = 3
CriuCgMode_FULL CriuCgMode = 4
@ -46,7 +56,7 @@ const (
var CriuCgMode_name = map[int32]string{
0: "IGNORE",
1: "NONE",
1: "CG_NONE",
2: "PROPS",
3: "SOFT",
4: "FULL",
@ -55,7 +65,7 @@ var CriuCgMode_name = map[int32]string{
}
var CriuCgMode_value = map[string]int32{
"IGNORE": 0,
"NONE": 1,
"CG_NONE": 1,
"PROPS": 2,
"SOFT": 3,
"FULL": 4,
@ -79,6 +89,7 @@ func (x *CriuCgMode) UnmarshalJSON(data []byte) error {
*x = CriuCgMode(value)
return nil
}
func (CriuCgMode) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{0} }
type CriuReqType int32
@ -93,19 +104,21 @@ const (
CriuReqType_CPUINFO_DUMP CriuReqType = 7
CriuReqType_CPUINFO_CHECK CriuReqType = 8
CriuReqType_FEATURE_CHECK CriuReqType = 9
CriuReqType_VERSION CriuReqType = 10
)
var CriuReqType_name = map[int32]string{
0: "EMPTY",
1: "DUMP",
2: "RESTORE",
3: "CHECK",
4: "PRE_DUMP",
5: "PAGE_SERVER",
6: "NOTIFY",
7: "CPUINFO_DUMP",
8: "CPUINFO_CHECK",
9: "FEATURE_CHECK",
0: "EMPTY",
1: "DUMP",
2: "RESTORE",
3: "CHECK",
4: "PRE_DUMP",
5: "PAGE_SERVER",
6: "NOTIFY",
7: "CPUINFO_DUMP",
8: "CPUINFO_CHECK",
9: "FEATURE_CHECK",
10: "VERSION",
}
var CriuReqType_value = map[string]int32{
"EMPTY": 0,
@ -118,6 +131,7 @@ var CriuReqType_value = map[string]int32{
"CPUINFO_DUMP": 7,
"CPUINFO_CHECK": 8,
"FEATURE_CHECK": 9,
"VERSION": 10,
}
func (x CriuReqType) Enum() *CriuReqType {
@ -136,6 +150,7 @@ func (x *CriuReqType) UnmarshalJSON(data []byte) error {
*x = CriuReqType(value)
return nil
}
func (CriuReqType) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{1} }
type CriuPageServerInfo struct {
Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"`
@ -145,9 +160,10 @@ type CriuPageServerInfo struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
func (*CriuPageServerInfo) ProtoMessage() {}
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
func (*CriuPageServerInfo) ProtoMessage() {}
func (*CriuPageServerInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} }
func (m *CriuPageServerInfo) GetAddress() string {
if m != nil && m.Address != nil {
@ -178,14 +194,15 @@ func (m *CriuPageServerInfo) GetFd() int32 {
}
type CriuVethPair struct {
IfIn *string `protobuf:"bytes,1,req,name=if_in" json:"if_in,omitempty"`
IfOut *string `protobuf:"bytes,2,req,name=if_out" json:"if_out,omitempty"`
IfIn *string `protobuf:"bytes,1,req,name=if_in,json=ifIn" json:"if_in,omitempty"`
IfOut *string `protobuf:"bytes,2,req,name=if_out,json=ifOut" json:"if_out,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
func (*CriuVethPair) ProtoMessage() {}
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
func (*CriuVethPair) ProtoMessage() {}
func (*CriuVethPair) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} }
func (m *CriuVethPair) GetIfIn() string {
if m != nil && m.IfIn != nil {
@ -207,9 +224,10 @@ type ExtMountMap struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
func (*ExtMountMap) ProtoMessage() {}
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
func (*ExtMountMap) ProtoMessage() {}
func (*ExtMountMap) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} }
func (m *ExtMountMap) GetKey() string {
if m != nil && m.Key != nil {
@ -225,15 +243,49 @@ func (m *ExtMountMap) GetVal() string {
return ""
}
type JoinNamespace struct {
Ns *string `protobuf:"bytes,1,req,name=ns" json:"ns,omitempty"`
NsFile *string `protobuf:"bytes,2,req,name=ns_file,json=nsFile" json:"ns_file,omitempty"`
ExtraOpt *string `protobuf:"bytes,3,opt,name=extra_opt,json=extraOpt" json:"extra_opt,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *JoinNamespace) Reset() { *m = JoinNamespace{} }
func (m *JoinNamespace) String() string { return proto.CompactTextString(m) }
func (*JoinNamespace) ProtoMessage() {}
func (*JoinNamespace) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} }
func (m *JoinNamespace) GetNs() string {
if m != nil && m.Ns != nil {
return *m.Ns
}
return ""
}
func (m *JoinNamespace) GetNsFile() string {
if m != nil && m.NsFile != nil {
return *m.NsFile
}
return ""
}
func (m *JoinNamespace) GetExtraOpt() string {
if m != nil && m.ExtraOpt != nil {
return *m.ExtraOpt
}
return ""
}
type InheritFd struct {
Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"`
Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *InheritFd) Reset() { *m = InheritFd{} }
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
func (*InheritFd) ProtoMessage() {}
func (m *InheritFd) Reset() { *m = InheritFd{} }
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
func (*InheritFd) ProtoMessage() {}
func (*InheritFd) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} }
func (m *InheritFd) GetKey() string {
if m != nil && m.Key != nil {
@ -255,9 +307,10 @@ type CgroupRoot struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
func (*CgroupRoot) ProtoMessage() {}
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
func (*CgroupRoot) ProtoMessage() {}
func (*CgroupRoot) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} }
func (m *CgroupRoot) GetCtrl() string {
if m != nil && m.Ctrl != nil {
@ -278,9 +331,10 @@ type UnixSk struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *UnixSk) Reset() { *m = UnixSk{} }
func (m *UnixSk) String() string { return proto.CompactTextString(m) }
func (*UnixSk) ProtoMessage() {}
func (m *UnixSk) Reset() { *m = UnixSk{} }
func (m *UnixSk) String() string { return proto.CompactTextString(m) }
func (*UnixSk) ProtoMessage() {}
func (*UnixSk) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} }
func (m *UnixSk) GetInode() uint32 {
if m != nil && m.Inode != nil {
@ -290,51 +344,62 @@ func (m *UnixSk) GetInode() uint32 {
}
type CriuOpts struct {
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"`
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"`
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"`
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"`
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"`
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"`
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"`
LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"`
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"`
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"`
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"`
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"`
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"`
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"`
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"`
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"`
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"`
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"`
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"`
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"`
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"`
AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt" json:"auto_ext_mnt,omitempty"`
ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing" json:"ext_sharing,omitempty"`
ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters" json:"ext_masters,omitempty"`
SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt" json:"skip_mnt,omitempty"`
EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs" json:"enable_fs,omitempty"`
UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino" json:"unix_sk_ino,omitempty"`
ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"`
GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,def=1048576" json:"ghost_limit,omitempty"`
IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths" json:"irmap_scan_paths,omitempty"`
External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"`
EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns" json:"empty_ns,omitempty"`
NoSeccomp *bool `protobuf:"varint,39,opt,name=no_seccomp" json:"no_seccomp,omitempty"`
XXX_unrecognized []byte `json:"-"`
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd,json=imagesDirFd" json:"images_dir_fd,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running,json=leaveRunning" json:"leave_running,omitempty"`
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk,json=extUnixSk" json:"ext_unix_sk,omitempty"`
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established,json=tcpEstablished" json:"tcp_established,omitempty"`
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices,json=evasiveDevices" json:"evasive_devices,omitempty"`
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job,json=shellJob" json:"shell_job,omitempty"`
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks,json=fileLocks" json:"file_locks,omitempty"`
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,json=logLevel,def=2" json:"log_level,omitempty"`
LogFile *string `protobuf:"bytes,10,opt,name=log_file,json=logFile" json:"log_file,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts,json=notifyScripts" json:"notify_scripts,omitempty"`
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img,json=parentImg" json:"parent_img,omitempty"`
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem,json=trackMem" json:"track_mem,omitempty"`
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup,json=autoDedup" json:"auto_dedup,omitempty"`
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd,json=workDirFd" json:"work_dir_fd,omitempty"`
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap,json=linkRemap" json:"link_remap,omitempty"`
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,json=cpuCap,def=4294967295" json:"cpu_cap,omitempty"`
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap,json=forceIrmap" json:"force_irmap,omitempty"`
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd,json=execCmd" json:"exec_cmd,omitempty"`
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt,json=extMnt" json:"ext_mnt,omitempty"`
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups,json=manageCgroups" json:"manage_cgroups,omitempty"`
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root,json=cgRoot" json:"cg_root,omitempty"`
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling,json=rstSibling" json:"rst_sibling,omitempty"`
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd,json=inheritFd" json:"inherit_fd,omitempty"`
AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt,json=autoExtMnt" json:"auto_ext_mnt,omitempty"`
ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing,json=extSharing" json:"ext_sharing,omitempty"`
ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters,json=extMasters" json:"ext_masters,omitempty"`
SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt,json=skipMnt" json:"skip_mnt,omitempty"`
EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs,json=enableFs" json:"enable_fs,omitempty"`
UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino,json=unixSkIno" json:"unix_sk_ino,omitempty"`
ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,json=manageCgroupsMode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"`
GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,json=ghostLimit,def=1048576" json:"ghost_limit,omitempty"`
IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths,json=irmapScanPaths" json:"irmap_scan_paths,omitempty"`
External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"`
EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns,json=emptyNs" json:"empty_ns,omitempty"`
JoinNs []*JoinNamespace `protobuf:"bytes,39,rep,name=join_ns,json=joinNs" json:"join_ns,omitempty"`
CgroupProps *string `protobuf:"bytes,41,opt,name=cgroup_props,json=cgroupProps" json:"cgroup_props,omitempty"`
CgroupPropsFile *string `protobuf:"bytes,42,opt,name=cgroup_props_file,json=cgroupPropsFile" json:"cgroup_props_file,omitempty"`
CgroupDumpController []string `protobuf:"bytes,43,rep,name=cgroup_dump_controller,json=cgroupDumpController" json:"cgroup_dump_controller,omitempty"`
FreezeCgroup *string `protobuf:"bytes,44,opt,name=freeze_cgroup,json=freezeCgroup" json:"freeze_cgroup,omitempty"`
Timeout *uint32 `protobuf:"varint,45,opt,name=timeout" json:"timeout,omitempty"`
TcpSkipInFlight *bool `protobuf:"varint,46,opt,name=tcp_skip_in_flight,json=tcpSkipInFlight" json:"tcp_skip_in_flight,omitempty"`
WeakSysctls *bool `protobuf:"varint,47,opt,name=weak_sysctls,json=weakSysctls" json:"weak_sysctls,omitempty"`
LazyPages *bool `protobuf:"varint,48,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"`
StatusFd *int32 `protobuf:"varint,49,opt,name=status_fd,json=statusFd" json:"status_fd,omitempty"`
OrphanPtsMaster *bool `protobuf:"varint,50,opt,name=orphan_pts_master,json=orphanPtsMaster" json:"orphan_pts_master,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
func (*CriuOpts) ProtoMessage() {}
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
func (*CriuOpts) ProtoMessage() {}
func (*CriuOpts) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} }
const Default_CriuOpts_LogLevel int32 = 2
const Default_CriuOpts_CpuCap uint32 = 4294967295
@ -606,9 +671,79 @@ func (m *CriuOpts) GetEmptyNs() uint32 {
return 0
}
func (m *CriuOpts) GetNoSeccomp() bool {
if m != nil && m.NoSeccomp != nil {
return *m.NoSeccomp
func (m *CriuOpts) GetJoinNs() []*JoinNamespace {
if m != nil {
return m.JoinNs
}
return nil
}
func (m *CriuOpts) GetCgroupProps() string {
if m != nil && m.CgroupProps != nil {
return *m.CgroupProps
}
return ""
}
func (m *CriuOpts) GetCgroupPropsFile() string {
if m != nil && m.CgroupPropsFile != nil {
return *m.CgroupPropsFile
}
return ""
}
func (m *CriuOpts) GetCgroupDumpController() []string {
if m != nil {
return m.CgroupDumpController
}
return nil
}
func (m *CriuOpts) GetFreezeCgroup() string {
if m != nil && m.FreezeCgroup != nil {
return *m.FreezeCgroup
}
return ""
}
func (m *CriuOpts) GetTimeout() uint32 {
if m != nil && m.Timeout != nil {
return *m.Timeout
}
return 0
}
func (m *CriuOpts) GetTcpSkipInFlight() bool {
if m != nil && m.TcpSkipInFlight != nil {
return *m.TcpSkipInFlight
}
return false
}
func (m *CriuOpts) GetWeakSysctls() bool {
if m != nil && m.WeakSysctls != nil {
return *m.WeakSysctls
}
return false
}
func (m *CriuOpts) GetLazyPages() bool {
if m != nil && m.LazyPages != nil {
return *m.LazyPages
}
return false
}
func (m *CriuOpts) GetStatusFd() int32 {
if m != nil && m.StatusFd != nil {
return *m.StatusFd
}
return 0
}
func (m *CriuOpts) GetOrphanPtsMaster() bool {
if m != nil && m.OrphanPtsMaster != nil {
return *m.OrphanPtsMaster
}
return false
}
@ -618,9 +753,10 @@ type CriuDumpResp struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
func (*CriuDumpResp) ProtoMessage() {}
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
func (*CriuDumpResp) ProtoMessage() {}
func (*CriuDumpResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} }
func (m *CriuDumpResp) GetRestored() bool {
if m != nil && m.Restored != nil {
@ -634,9 +770,10 @@ type CriuRestoreResp struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
func (*CriuRestoreResp) ProtoMessage() {}
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
func (*CriuRestoreResp) ProtoMessage() {}
func (*CriuRestoreResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} }
func (m *CriuRestoreResp) GetPid() int32 {
if m != nil && m.Pid != nil {
@ -651,9 +788,10 @@ type CriuNotify struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
func (*CriuNotify) ProtoMessage() {}
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
func (*CriuNotify) ProtoMessage() {}
func (*CriuNotify) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} }
func (m *CriuNotify) GetScript() string {
if m != nil && m.Script != nil {
@ -673,13 +811,15 @@ func (m *CriuNotify) GetPid() int32 {
// List of features which can queried via
// CRIU_REQ_TYPE__FEATURE_CHECK
type CriuFeatures struct {
MemTrack *bool `protobuf:"varint,1,opt,name=mem_track" json:"mem_track,omitempty"`
MemTrack *bool `protobuf:"varint,1,opt,name=mem_track,json=memTrack" json:"mem_track,omitempty"`
LazyPages *bool `protobuf:"varint,2,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuFeatures) Reset() { *m = CriuFeatures{} }
func (m *CriuFeatures) String() string { return proto.CompactTextString(m) }
func (*CriuFeatures) ProtoMessage() {}
func (m *CriuFeatures) Reset() { *m = CriuFeatures{} }
func (m *CriuFeatures) String() string { return proto.CompactTextString(m) }
func (*CriuFeatures) ProtoMessage() {}
func (*CriuFeatures) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} }
func (m *CriuFeatures) GetMemTrack() bool {
if m != nil && m.MemTrack != nil {
@ -688,15 +828,22 @@ func (m *CriuFeatures) GetMemTrack() bool {
return false
}
func (m *CriuFeatures) GetLazyPages() bool {
if m != nil && m.LazyPages != nil {
return *m.LazyPages
}
return false
}
type CriuReq struct {
Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"`
Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"`
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success" json:"notify_success,omitempty"`
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success,json=notifySuccess" json:"notify_success,omitempty"`
//
// When set service won't close the connection but
// will wait for more req-s to appear. Works not
// for all request types.
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open" json:"keep_open,omitempty"`
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open,json=keepOpen" json:"keep_open,omitempty"`
//
// 'features' can be used to query which features
// are supported by the installed criu/kernel
@ -705,9 +852,10 @@ type CriuReq struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuReq) Reset() { *m = CriuReq{} }
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
func (*CriuReq) ProtoMessage() {}
func (m *CriuReq) Reset() { *m = CriuReq{} }
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
func (*CriuReq) ProtoMessage() {}
func (*CriuReq) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} }
func (m *CriuReq) GetType() CriuReqType {
if m != nil && m.Type != nil {
@ -751,14 +899,17 @@ type CriuResp struct {
Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"`
Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"`
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno" json:"cr_errno,omitempty"`
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno,json=crErrno" json:"cr_errno,omitempty"`
Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"`
CrErrmsg *string `protobuf:"bytes,9,opt,name=cr_errmsg,json=crErrmsg" json:"cr_errmsg,omitempty"`
Version *CriuVersion `protobuf:"bytes,10,opt,name=version" json:"version,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuResp) Reset() { *m = CriuResp{} }
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
func (*CriuResp) ProtoMessage() {}
func (m *CriuResp) Reset() { *m = CriuResp{} }
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
func (*CriuResp) ProtoMessage() {}
func (*CriuResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} }
func (m *CriuResp) GetType() CriuReqType {
if m != nil && m.Type != nil {
@ -816,7 +967,212 @@ func (m *CriuResp) GetFeatures() *CriuFeatures {
return nil
}
func (m *CriuResp) GetCrErrmsg() string {
if m != nil && m.CrErrmsg != nil {
return *m.CrErrmsg
}
return ""
}
func (m *CriuResp) GetVersion() *CriuVersion {
if m != nil {
return m.Version
}
return nil
}
// Answer for criu_req_type.VERSION requests
type CriuVersion struct {
Major *int32 `protobuf:"varint,1,req,name=major" json:"major,omitempty"`
Minor *int32 `protobuf:"varint,2,req,name=minor" json:"minor,omitempty"`
Gitid *string `protobuf:"bytes,3,opt,name=gitid" json:"gitid,omitempty"`
Sublevel *int32 `protobuf:"varint,4,opt,name=sublevel" json:"sublevel,omitempty"`
Extra *int32 `protobuf:"varint,5,opt,name=extra" json:"extra,omitempty"`
Name *string `protobuf:"bytes,6,opt,name=name" json:"name,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuVersion) Reset() { *m = CriuVersion{} }
func (m *CriuVersion) String() string { return proto.CompactTextString(m) }
func (*CriuVersion) ProtoMessage() {}
func (*CriuVersion) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} }
func (m *CriuVersion) GetMajor() int32 {
if m != nil && m.Major != nil {
return *m.Major
}
return 0
}
func (m *CriuVersion) GetMinor() int32 {
if m != nil && m.Minor != nil {
return *m.Minor
}
return 0
}
func (m *CriuVersion) GetGitid() string {
if m != nil && m.Gitid != nil {
return *m.Gitid
}
return ""
}
func (m *CriuVersion) GetSublevel() int32 {
if m != nil && m.Sublevel != nil {
return *m.Sublevel
}
return 0
}
func (m *CriuVersion) GetExtra() int32 {
if m != nil && m.Extra != nil {
return *m.Extra
}
return 0
}
func (m *CriuVersion) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func init() {
proto.RegisterType((*CriuPageServerInfo)(nil), "criu_page_server_info")
proto.RegisterType((*CriuVethPair)(nil), "criu_veth_pair")
proto.RegisterType((*ExtMountMap)(nil), "ext_mount_map")
proto.RegisterType((*JoinNamespace)(nil), "join_namespace")
proto.RegisterType((*InheritFd)(nil), "inherit_fd")
proto.RegisterType((*CgroupRoot)(nil), "cgroup_root")
proto.RegisterType((*UnixSk)(nil), "unix_sk")
proto.RegisterType((*CriuOpts)(nil), "criu_opts")
proto.RegisterType((*CriuDumpResp)(nil), "criu_dump_resp")
proto.RegisterType((*CriuRestoreResp)(nil), "criu_restore_resp")
proto.RegisterType((*CriuNotify)(nil), "criu_notify")
proto.RegisterType((*CriuFeatures)(nil), "criu_features")
proto.RegisterType((*CriuReq)(nil), "criu_req")
proto.RegisterType((*CriuResp)(nil), "criu_resp")
proto.RegisterType((*CriuVersion)(nil), "criu_version")
proto.RegisterEnum("CriuCgMode", CriuCgMode_name, CriuCgMode_value)
proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value)
}
func init() { proto.RegisterFile("criurpc.proto", fileDescriptor0) }
var fileDescriptor0 = []byte{
// 1781 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0xdd, 0x72, 0x5b, 0xb7,
0x11, 0x0e, 0x29, 0xfe, 0x1c, 0x82, 0x3f, 0xa6, 0x10, 0xdb, 0x81, 0x93, 0xda, 0x62, 0xe8, 0x28,
0x51, 0x15, 0x97, 0x4d, 0x58, 0x3b, 0xae, 0x33, 0xed, 0x85, 0x47, 0x22, 0x5d, 0x36, 0x92, 0xc8,
0x01, 0x25, 0xcf, 0xe4, 0x0a, 0x73, 0x74, 0x0e, 0x48, 0xc1, 0x3c, 0x7f, 0x05, 0x40, 0x45, 0xf2,
0x83, 0xf4, 0x29, 0xfa, 0x0c, 0x7d, 0x84, 0xbe, 0x4e, 0x6f, 0x3b, 0xbb, 0x00, 0x65, 0x29, 0xc9,
0xb4, 0xbd, 0xc3, 0x7e, 0x58, 0x00, 0xbb, 0xfb, 0xed, 0x0f, 0x48, 0x3b, 0xd2, 0x6a, 0xad, 0x8b,
0x68, 0x50, 0xe8, 0xdc, 0xe6, 0xfd, 0x25, 0x79, 0x00, 0x80, 0x28, 0xc2, 0xa5, 0x14, 0x46, 0xea,
0x4b, 0xa9, 0x85, 0xca, 0x16, 0x39, 0x65, 0xa4, 0x1e, 0xc6, 0xb1, 0x96, 0xc6, 0xb0, 0x52, 0xaf,
0xb4, 0xd7, 0xe0, 0x1b, 0x91, 0x52, 0x52, 0x29, 0x72, 0x6d, 0x59, 0xb9, 0x57, 0xda, 0xab, 0x72,
0x5c, 0xd3, 0x2e, 0xd9, 0x2a, 0x54, 0xcc, 0xb6, 0x10, 0x82, 0x25, 0xed, 0x90, 0xf2, 0x22, 0x66,
0x15, 0x04, 0xca, 0x8b, 0xb8, 0xff, 0x27, 0xd2, 0xc1, 0x87, 0x2e, 0xa5, 0xbd, 0x10, 0x45, 0xa8,
0x34, 0xfd, 0x98, 0x54, 0xd5, 0x42, 0xa8, 0x8c, 0x95, 0x7a, 0xe5, 0xbd, 0x06, 0xaf, 0xa8, 0xc5,
0x24, 0xa3, 0x0f, 0x48, 0x4d, 0x2d, 0x44, 0xbe, 0x86, 0xeb, 0x01, 0xad, 0xaa, 0xc5, 0x74, 0x6d,
0xfb, 0x7f, 0x20, 0x6d, 0x79, 0x65, 0x45, 0x9a, 0xaf, 0x33, 0x2b, 0xd2, 0xb0, 0x80, 0x07, 0x57,
0xf2, 0xda, 0x1f, 0x85, 0x25, 0x20, 0x97, 0x61, 0xe2, 0x8f, 0xc1, 0xb2, 0xff, 0x96, 0x74, 0xde,
0xe5, 0x2a, 0x13, 0x59, 0x98, 0x4a, 0x53, 0x84, 0x91, 0x04, 0xa3, 0x32, 0xe3, 0x0f, 0x95, 0x33,
0x43, 0x3f, 0x21, 0xf5, 0xcc, 0x88, 0x85, 0x4a, 0xa4, 0x3f, 0x57, 0xcb, 0xcc, 0x58, 0x25, 0x92,
0x7e, 0x46, 0x1a, 0xf2, 0xca, 0xea, 0x50, 0xe4, 0x85, 0x45, 0xaf, 0x1a, 0x3c, 0x40, 0x60, 0x5a,
0xd8, 0xfe, 0x80, 0x10, 0x95, 0x5d, 0x48, 0xad, 0xac, 0x58, 0xc4, 0xbf, 0x62, 0x89, 0x73, 0x1d,
0x2e, 0x74, 0xae, 0xbf, 0x20, 0xcd, 0x68, 0xa9, 0xf3, 0x75, 0x21, 0x74, 0x9e, 0x5b, 0x88, 0x5f,
0x64, 0x75, 0xe2, 0xc3, 0x8a, 0x6b, 0x8c, 0x69, 0x68, 0x2f, 0xbc, 0x15, 0xb8, 0xee, 0xef, 0x90,
0xfa, 0x3a, 0x53, 0x57, 0xc2, 0xac, 0xe8, 0x7d, 0x52, 0x55, 0x59, 0x1e, 0x4b, 0x7c, 0xa5, 0xcd,
0x9d, 0xd0, 0xff, 0x57, 0x9b, 0x34, 0x30, 0xa6, 0x79, 0x61, 0x0d, 0xed, 0x93, 0xb6, 0x4a, 0xc3,
0xa5, 0x34, 0x22, 0x56, 0x5a, 0x2c, 0x62, 0xd4, 0xad, 0xf2, 0xa6, 0x03, 0x0f, 0x95, 0x1e, 0xc7,
0x1b, 0x9a, 0xca, 0x1f, 0x68, 0x7a, 0x4a, 0xda, 0x89, 0x0c, 0x2f, 0xa5, 0xd0, 0xeb, 0x2c, 0x53,
0xd9, 0x12, 0x9d, 0x0d, 0x78, 0x0b, 0x41, 0xee, 0x30, 0xfa, 0x84, 0x34, 0x21, 0xfa, 0xde, 0x1a,
0x24, 0x35, 0xe0, 0x10, 0xa0, 0xb3, 0x4c, 0x5d, 0xcd, 0x57, 0xf4, 0x2b, 0x72, 0xcf, 0x46, 0x85,
0x90, 0xc6, 0x86, 0xe7, 0x89, 0x32, 0x17, 0x32, 0x66, 0x55, 0xd4, 0xe9, 0xd8, 0xa8, 0x18, 0x7d,
0x40, 0x41, 0x51, 0x5e, 0x86, 0x46, 0x5d, 0x4a, 0x11, 0xcb, 0x4b, 0x15, 0x49, 0xc3, 0x6a, 0x4e,
0xd1, 0xc3, 0x87, 0x0e, 0x85, 0xf8, 0x9b, 0x0b, 0x99, 0x24, 0xe2, 0x5d, 0x7e, 0xce, 0xea, 0xa8,
0x12, 0x20, 0xf0, 0xd7, 0xfc, 0x9c, 0x3e, 0x26, 0x04, 0x28, 0x13, 0x49, 0x1e, 0xad, 0x0c, 0x0b,
0x9c, 0x35, 0x80, 0x1c, 0x01, 0x40, 0x9f, 0x90, 0x46, 0x92, 0x2f, 0x45, 0x22, 0x2f, 0x65, 0xc2,
0x1a, 0xe0, 0xea, 0xf7, 0xa5, 0x21, 0x0f, 0x92, 0x7c, 0x79, 0x04, 0x10, 0x7d, 0x44, 0x60, 0xed,
0x58, 0x27, 0x2e, 0xb5, 0x93, 0x7c, 0x89, 0xb4, 0x7f, 0x49, 0xca, 0x85, 0x61, 0xcd, 0x5e, 0x69,
0xaf, 0x39, 0x7c, 0x38, 0xf8, 0xd5, 0xc2, 0xe0, 0xe5, 0xc2, 0xd0, 0x5d, 0xd2, 0xc9, 0x72, 0xab,
0x16, 0xd7, 0xc2, 0x44, 0x5a, 0x15, 0xd6, 0xb0, 0x16, 0x5a, 0xd1, 0x76, 0xe8, 0xdc, 0x81, 0xc0,
0x2a, 0x30, 0xce, 0xda, 0x8e, 0x69, 0x64, 0xff, 0x31, 0x21, 0x45, 0xa8, 0x65, 0x66, 0x85, 0x4a,
0x97, 0xac, 0x83, 0x3b, 0x0d, 0x87, 0x4c, 0xd2, 0x25, 0x38, 0x6e, 0x75, 0x18, 0xad, 0x44, 0x2a,
0x53, 0x76, 0xcf, 0x39, 0x8e, 0xc0, 0xb1, 0x4c, 0xe1, 0x6c, 0xb8, 0xb6, 0xb9, 0x88, 0x65, 0xbc,
0x2e, 0x58, 0xd7, 0x39, 0x0e, 0xc8, 0x21, 0x00, 0x40, 0xd3, 0x4f, 0xb9, 0x5e, 0x6d, 0xf8, 0xdf,
0x46, 0x96, 0x1b, 0x00, 0x39, 0xf6, 0x1f, 0x13, 0x92, 0xa8, 0x6c, 0x25, 0xb4, 0x4c, 0xc3, 0x82,
0x51, 0x77, 0x1c, 0x10, 0x0e, 0x00, 0xdd, 0x25, 0x55, 0x28, 0x4e, 0xc3, 0x3e, 0xee, 0x6d, 0xed,
0x35, 0x87, 0xf7, 0x06, 0x77, 0xeb, 0x95, 0xbb, 0x5d, 0xfa, 0x94, 0xd4, 0xa3, 0x62, 0x2d, 0xa2,
0xb0, 0x60, 0xf7, 0x7b, 0xa5, 0xbd, 0xf6, 0xf7, 0xe4, 0xf9, 0xf0, 0xd5, 0xf3, 0x57, 0xdf, 0xbd,
0x1c, 0xbe, 0x7a, 0xc1, 0x6b, 0x51, 0xb1, 0x3e, 0x08, 0x0b, 0xba, 0x43, 0x9a, 0x8b, 0x5c, 0x47,
0x52, 0x28, 0x0d, 0x6f, 0x3d, 0xc0, 0xb7, 0x08, 0x42, 0x13, 0x40, 0x80, 0x04, 0x79, 0x25, 0x23,
0x11, 0xa5, 0x31, 0x7b, 0xd8, 0xdb, 0x02, 0x12, 0x40, 0x3e, 0x48, 0x21, 0x49, 0xea, 0x58, 0xeb,
0x99, 0x65, 0x9f, 0xa0, 0x25, 0x9d, 0xc1, 0x9d, 0xda, 0xe7, 0x35, 0x79, 0x65, 0x8f, 0x33, 0x0b,
0x2c, 0xa4, 0x61, 0x06, 0xfc, 0xb8, 0xf2, 0x32, 0x8c, 0x39, 0x16, 0x1c, 0x7a, 0xe0, 0x40, 0xba,
0x4b, 0xea, 0xd1, 0x12, 0x4b, 0x8f, 0x3d, 0xc2, 0xfb, 0x5a, 0x83, 0x5b, 0xe5, 0xc8, 0x6b, 0xd1,
0x92, 0x03, 0x31, 0x3b, 0xa4, 0xa9, 0x8d, 0x15, 0x46, 0x9d, 0x27, 0x50, 0x07, 0x9f, 0x3a, 0x93,
0xb5, 0xb1, 0x73, 0x87, 0xd0, 0xfd, 0xdb, 0x65, 0xcf, 0x3e, 0xc3, 0xab, 0x9a, 0x83, 0x0f, 0x10,
0x6f, 0xf8, 0xf5, 0x38, 0xa6, 0x3d, 0xd2, 0x42, 0xa6, 0x36, 0x8e, 0xfc, 0xc6, 0xdd, 0x06, 0xd8,
0xc8, 0x19, 0xbf, 0xe3, 0x6a, 0xca, 0x5c, 0x84, 0x1a, 0x9e, 0x7b, 0xec, 0x14, 0xe4, 0x95, 0x9d,
0x3b, 0x64, 0xa3, 0x90, 0x86, 0xc6, 0x4a, 0x6d, 0xd8, 0x93, 0x1b, 0x85, 0x63, 0x87, 0x40, 0x08,
0xcd, 0x4a, 0x15, 0x78, 0xff, 0x8e, 0x0b, 0x21, 0xc8, 0x70, 0x39, 0xb4, 0xaf, 0x2c, 0x3c, 0x4f,
0xa4, 0x58, 0x18, 0xd6, 0xc3, 0xbd, 0xc0, 0x01, 0x63, 0x43, 0xf7, 0x48, 0xd3, 0x57, 0xb2, 0x50,
0x59, 0xce, 0x3e, 0x47, 0x47, 0x82, 0x81, 0xc7, 0x78, 0x63, 0x8d, 0x45, 0x3d, 0xc9, 0x72, 0xfa,
0x67, 0xf2, 0xf1, 0xdd, 0x00, 0x8b, 0x14, 0x9a, 0x50, 0xbf, 0x57, 0xda, 0xeb, 0x0c, 0xdb, 0x2e,
0x3f, 0xa2, 0x25, 0x82, 0x7c, 0xfb, 0x4e, 0xd0, 0x8f, 0xf3, 0x58, 0xc2, 0x43, 0xcb, 0x8b, 0xdc,
0x58, 0x91, 0xa8, 0x54, 0x59, 0xf6, 0x14, 0xb3, 0xa5, 0xfe, 0xed, 0x37, 0xcf, 0xff, 0xf8, 0xe2,
0xe5, 0x77, 0x9c, 0xe0, 0xde, 0x11, 0x6c, 0xd1, 0x3d, 0xd2, 0xc5, 0x44, 0x11, 0x26, 0x0a, 0x33,
0x01, 0xdd, 0xcf, 0xb0, 0x2f, 0xd0, 0xec, 0x0e, 0xe2, 0xf3, 0x28, 0xcc, 0x66, 0x80, 0xd2, 0x4f,
0x21, 0x6f, 0xac, 0xd4, 0x59, 0x98, 0xb0, 0x5d, 0xef, 0x98, 0x97, 0x31, 0xa7, 0xd2, 0xc2, 0x5e,
0x8b, 0xcc, 0xb0, 0x2f, 0xe1, 0x31, 0x5e, 0x47, 0xf9, 0x04, 0x7c, 0xae, 0xbb, 0x51, 0x60, 0xd8,
0x57, 0x3e, 0xbb, 0xef, 0x8e, 0x06, 0x5e, 0x03, 0xf9, 0xc4, 0xd0, 0xcf, 0x49, 0xcb, 0x67, 0x47,
0xa1, 0xf3, 0xc2, 0xb0, 0xdf, 0x62, 0x85, 0xfa, 0x06, 0x3e, 0x03, 0x88, 0xee, 0x93, 0xed, 0xdb,
0x2a, 0xae, 0x93, 0xec, 0xa3, 0xde, 0xbd, 0x5b, 0x7a, 0xd8, 0x51, 0x9e, 0x93, 0x87, 0x5e, 0x37,
0x5e, 0xa7, 0x85, 0x88, 0xf2, 0xcc, 0xea, 0x3c, 0x49, 0xa4, 0x66, 0x5f, 0xa3, 0xf5, 0xf7, 0xdd,
0xee, 0xe1, 0x3a, 0x2d, 0x0e, 0x6e, 0xf6, 0xa0, 0x2b, 0x2f, 0xb4, 0x94, 0xef, 0x37, 0x81, 0x67,
0xcf, 0xf0, 0xf6, 0x96, 0x03, 0x5d, 0x8c, 0x61, 0x42, 0x5b, 0x95, 0x4a, 0x98, 0x95, 0xbf, 0x73,
0xde, 0x7a, 0x91, 0x7e, 0x4d, 0x28, 0xf4, 0x63, 0xcc, 0x0e, 0x95, 0x89, 0x45, 0xa2, 0x96, 0x17,
0x96, 0x0d, 0x30, 0x83, 0xa0, 0x53, 0xcf, 0x57, 0xaa, 0x98, 0x64, 0x63, 0x84, 0xc1, 0xe1, 0x9f,
0x64, 0xb8, 0x12, 0xe6, 0xda, 0x44, 0x36, 0x31, 0xec, 0xf7, 0xa8, 0xd6, 0x04, 0x6c, 0xee, 0x20,
0x6c, 0x1c, 0xe1, 0xfb, 0x6b, 0xec, 0x85, 0x86, 0x7d, 0xe3, 0x1b, 0x47, 0xf8, 0xfe, 0x7a, 0x06,
0x00, 0x36, 0x6b, 0x1b, 0xda, 0xb5, 0x81, 0xba, 0xf8, 0x16, 0xbb, 0x4e, 0xe0, 0x80, 0x71, 0x0c,
0xc1, 0xca, 0x75, 0x71, 0x01, 0xb4, 0x5a, 0xe3, 0xb3, 0x99, 0x0d, 0x9d, 0x29, 0x6e, 0x63, 0x66,
0x8d, 0x4b, 0xe9, 0xfe, 0x33, 0xff, 0x47, 0xc0, 0x50, 0x69, 0x69, 0x0a, 0xa0, 0x5b, 0x4b, 0x63,
0x73, 0x2d, 0x63, 0x9c, 0x97, 0x01, 0xbf, 0x91, 0xfb, 0xbb, 0x64, 0x1b, 0xb5, 0x3d, 0xe0, 0x0e,
0xf8, 0x09, 0xe7, 0x66, 0x1f, 0x2c, 0xfb, 0x2f, 0x49, 0x13, 0xd5, 0x5c, 0x6b, 0xa6, 0x0f, 0x49,
0xcd, 0xf5, 0x6c, 0x3f, 0x7f, 0xbd, 0xf4, 0xcb, 0xd1, 0xd8, 0xff, 0xc1, 0xfd, 0x95, 0xc4, 0x42,
0x86, 0x76, 0xad, 0x9d, 0x9f, 0xa9, 0x4c, 0x05, 0xb6, 0xe3, 0x8d, 0x35, 0xa9, 0x4c, 0x4f, 0x41,
0xfe, 0x59, 0x8c, 0xca, 0x3f, 0x8b, 0x51, 0xff, 0x9f, 0x25, 0x12, 0x78, 0x6b, 0xff, 0x46, 0xfb,
0xa4, 0x62, 0xaf, 0x0b, 0x37, 0xcd, 0x3b, 0xc3, 0xce, 0x60, 0xb3, 0x21, 0x00, 0xe5, 0xb8, 0x47,
0x9f, 0x90, 0x0a, 0x8c, 0x75, 0xbc, 0xa9, 0x39, 0x24, 0x83, 0x9b, 0x41, 0xcf, 0x11, 0xbf, 0x3d,
0x82, 0xd6, 0x51, 0x04, 0xdf, 0xb4, 0xad, 0x3b, 0x23, 0xc8, 0x81, 0x60, 0xf3, 0x4a, 0xca, 0x42,
0xe4, 0x85, 0xcc, 0xfc, 0xe0, 0x0e, 0x00, 0x98, 0x16, 0x32, 0xa3, 0xfb, 0x24, 0xd8, 0x38, 0x87,
0x03, 0xbb, 0xb9, 0xb1, 0x65, 0x83, 0xf2, 0x9b, 0xfd, 0xfe, 0xbf, 0xcb, 0xfe, 0xb3, 0x81, 0x61,
0xfe, 0x7f, 0x3c, 0x60, 0xa4, 0xbe, 0x31, 0x0d, 0xbe, 0x35, 0x01, 0xdf, 0x88, 0xf4, 0x29, 0xa9,
0x00, 0xc5, 0x68, 0xf1, 0xcd, 0xa0, 0xb9, 0x21, 0x9d, 0xe3, 0x26, 0x7d, 0x46, 0xea, 0x9e, 0x59,
0xb4, 0xbb, 0x39, 0xa4, 0x83, 0x5f, 0xd0, 0xcd, 0x37, 0x2a, 0xf4, 0x0b, 0x52, 0x73, 0x8e, 0x7b,
0x47, 0x5a, 0x83, 0x5b, 0xa4, 0x73, 0xbf, 0xe7, 0xe7, 0x7b, 0xed, 0x7f, 0xce, 0xf7, 0x47, 0x40,
0x96, 0x90, 0x5a, 0x67, 0x39, 0xfe, 0x3e, 0xaa, 0xbc, 0x1e, 0xe9, 0x11, 0x88, 0x77, 0x62, 0x16,
0xfc, 0xf7, 0x98, 0x41, 0xf0, 0xdd, 0x35, 0xa9, 0x59, 0xe2, 0x4f, 0xa4, 0xc1, 0x03, 0xbc, 0x27,
0x35, 0x4b, 0x18, 0x73, 0x97, 0x52, 0x1b, 0x95, 0x67, 0xf8, 0x0b, 0x69, 0x6e, 0x1a, 0xaa, 0x07,
0xf9, 0x66, 0xb7, 0xff, 0xf7, 0x12, 0x69, 0xdd, 0xde, 0x81, 0xdf, 0x60, 0x1a, 0xbe, 0xcb, 0xb5,
0xcf, 0x72, 0x27, 0x20, 0xaa, 0xb2, 0x5c, 0xfb, 0x8f, 0xa7, 0x13, 0x00, 0x5d, 0x2a, 0xeb, 0xbf,
0xe6, 0x0d, 0xee, 0x04, 0x28, 0x2b, 0xb3, 0x3e, 0x77, 0x3f, 0xa4, 0x8a, 0x2f, 0x58, 0x2f, 0xc3,
0x09, 0xfc, 0xe9, 0x62, 0x20, 0xab, 0xdc, 0x09, 0xf0, 0x95, 0x81, 0x5e, 0x89, 0xb1, 0x6b, 0x70,
0x5c, 0xef, 0x0b, 0x6f, 0x97, 0x1f, 0x01, 0x94, 0x90, 0xda, 0xe4, 0xcd, 0xc9, 0x94, 0x8f, 0xba,
0x1f, 0xd1, 0x26, 0xa9, 0x1f, 0xbc, 0x11, 0x27, 0xd3, 0x93, 0x51, 0xb7, 0x44, 0x1b, 0xa4, 0x3a,
0xe3, 0xd3, 0xd9, 0xbc, 0x5b, 0xa6, 0x01, 0xa9, 0xcc, 0xa7, 0xe3, 0xd3, 0xee, 0x16, 0xac, 0xc6,
0x67, 0x47, 0x47, 0xdd, 0x0a, 0x9c, 0x9b, 0x9f, 0xf2, 0xc9, 0xc1, 0x69, 0xb7, 0x0a, 0xe7, 0x0e,
0x47, 0xe3, 0xd7, 0x67, 0x47, 0xa7, 0xdd, 0xda, 0xfe, 0x3f, 0x4a, 0xbe, 0x04, 0x37, 0x99, 0x05,
0x37, 0x8d, 0x8e, 0x67, 0xa7, 0x3f, 0x76, 0x3f, 0x82, 0xf3, 0x87, 0x67, 0xc7, 0xb3, 0x6e, 0x09,
0xce, 0xf0, 0xd1, 0xfc, 0x14, 0x1e, 0x2e, 0x83, 0xc6, 0xc1, 0x5f, 0x46, 0x07, 0x3f, 0x74, 0xb7,
0x68, 0x8b, 0x04, 0x33, 0x3e, 0x12, 0xa8, 0x55, 0xa1, 0xf7, 0x48, 0x73, 0xf6, 0xfa, 0xcd, 0x48,
0xcc, 0x47, 0xfc, 0xed, 0x88, 0x77, 0xab, 0xf0, 0xec, 0xc9, 0xf4, 0x74, 0x32, 0xfe, 0xb1, 0x5b,
0xa3, 0x5d, 0xd2, 0x3a, 0x98, 0x9d, 0x4d, 0x4e, 0xc6, 0x53, 0xa7, 0x5e, 0xa7, 0xdb, 0xa4, 0xbd,
0x41, 0xdc, 0x7d, 0x01, 0x40, 0xe3, 0xd1, 0xeb, 0xd3, 0x33, 0x3e, 0xf2, 0x50, 0x03, 0x9e, 0x7e,
0x3b, 0xe2, 0xf3, 0xc9, 0xf4, 0xa4, 0x4b, 0xfe, 0x13, 0x00, 0x00, 0xff, 0xff, 0x5f, 0x2a, 0xaf,
0x49, 0x5b, 0x0d, 0x00, 0x00,
}

View file

@ -0,0 +1,209 @@
syntax = "proto2";
message criu_page_server_info {
optional string address = 1;
optional int32 port = 2;
optional int32 pid = 3;
optional int32 fd = 4;
}
message criu_veth_pair {
required string if_in = 1;
required string if_out = 2;
};
message ext_mount_map {
required string key = 1;
required string val = 2;
};
message join_namespace {
required string ns = 1;
required string ns_file = 2;
optional string extra_opt = 3;
}
message inherit_fd {
required string key = 1;
required int32 fd = 2;
};
message cgroup_root {
optional string ctrl = 1;
required string path = 2;
};
message unix_sk {
required uint32 inode = 1;
};
enum criu_cg_mode {
IGNORE = 0;
CG_NONE = 1;
PROPS = 2;
SOFT = 3;
FULL = 4;
STRICT = 5;
DEFAULT = 6;
};
message criu_opts {
required int32 images_dir_fd = 1;
optional int32 pid = 2; /* if not set on dump, will dump requesting process */
optional bool leave_running = 3;
optional bool ext_unix_sk = 4;
optional bool tcp_established = 5;
optional bool evasive_devices = 6;
optional bool shell_job = 7;
optional bool file_locks = 8;
optional int32 log_level = 9 [default = 2];
optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */
optional criu_page_server_info ps = 11;
optional bool notify_scripts = 12;
optional string root = 13;
optional string parent_img = 14;
optional bool track_mem = 15;
optional bool auto_dedup = 16;
optional int32 work_dir_fd = 17;
optional bool link_remap = 18;
repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */
optional uint32 cpu_cap = 20 [default = 0xffffffff];
optional bool force_irmap = 21;
repeated string exec_cmd = 22;
repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */
optional bool manage_cgroups = 24; /* backward compatibility */
repeated cgroup_root cg_root = 25;
optional bool rst_sibling = 26; /* swrk only */
repeated inherit_fd inherit_fd = 27; /* swrk only */
optional bool auto_ext_mnt = 28;
optional bool ext_sharing = 29;
optional bool ext_masters = 30;
repeated string skip_mnt = 31;
repeated string enable_fs = 32;
repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */
optional criu_cg_mode manage_cgroups_mode = 34;
optional uint32 ghost_limit = 35 [default = 0x100000];
repeated string irmap_scan_paths = 36;
repeated string external = 37;
optional uint32 empty_ns = 38;
repeated join_namespace join_ns = 39;
optional string cgroup_props = 41;
optional string cgroup_props_file = 42;
repeated string cgroup_dump_controller = 43;
optional string freeze_cgroup = 44;
optional uint32 timeout = 45;
optional bool tcp_skip_in_flight = 46;
optional bool weak_sysctls = 47;
optional bool lazy_pages = 48;
optional int32 status_fd = 49;
optional bool orphan_pts_master = 50;
}
message criu_dump_resp {
optional bool restored = 1;
}
message criu_restore_resp {
required int32 pid = 1;
}
message criu_notify {
optional string script = 1;
optional int32 pid = 2;
}
enum criu_req_type {
EMPTY = 0;
DUMP = 1;
RESTORE = 2;
CHECK = 3;
PRE_DUMP = 4;
PAGE_SERVER = 5;
NOTIFY = 6;
CPUINFO_DUMP = 7;
CPUINFO_CHECK = 8;
FEATURE_CHECK = 9;
VERSION = 10;
}
/*
* List of features which can queried via
* CRIU_REQ_TYPE__FEATURE_CHECK
*/
message criu_features {
optional bool mem_track = 1;
optional bool lazy_pages = 2;
}
/*
* Request -- each type corresponds to must-be-there
* request arguments of respective type
*/
message criu_req {
required criu_req_type type = 1;
optional criu_opts opts = 2;
optional bool notify_success = 3;
/*
* When set service won't close the connection but
* will wait for more req-s to appear. Works not
* for all request types.
*/
optional bool keep_open = 4;
/*
* 'features' can be used to query which features
* are supported by the installed criu/kernel
* via RPC.
*/
optional criu_features features = 5;
}
/*
* Response -- it states whether the request was served
* and additional request-specific information
*/
message criu_resp {
required criu_req_type type = 1;
required bool success = 2;
optional criu_dump_resp dump = 3;
optional criu_restore_resp restore = 4;
optional criu_notify notify = 5;
optional criu_page_server_info ps = 6;
optional int32 cr_errno = 7;
optional criu_features features = 8;
optional string cr_errmsg = 9;
optional criu_version version = 10;
}
/* Answer for criu_req_type.VERSION requests */
message criu_version {
required int32 major = 1;
required int32 minor = 2;
optional string gitid = 3;
optional int32 sublevel = 4;
optional int32 extra = 5;
optional string name = 6;
}

View file

@ -0,0 +1,104 @@
package devices
import (
"errors"
"io/ioutil"
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
var (
ErrNotADevice = errors.New("not a device node")
)
// Testing dependencies
var (
unixLstat = unix.Lstat
ioutilReadDir = ioutil.ReadDir
)
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the information about a linux device and return that information as a Device struct.
func DeviceFromPath(path, permissions string) (*configs.Device, error) {
var stat unix.Stat_t
err := unixLstat(path, &stat)
if err != nil {
return nil, err
}
var (
devNumber = stat.Rdev
major = unix.Major(devNumber)
)
if major == 0 {
return nil, ErrNotADevice
}
var (
devType rune
mode = stat.Mode
)
switch {
case mode&unix.S_IFBLK == unix.S_IFBLK:
devType = 'b'
case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = 'c'
}
return &configs.Device{
Type: devType,
Path: path,
Major: int64(major),
Minor: int64(unix.Minor(devNumber)),
Permissions: permissions,
FileMode: os.FileMode(mode),
Uid: stat.Uid,
Gid: stat.Gid,
}, nil
}
func HostDevices() ([]*configs.Device, error) {
return getDevices("/dev")
}
func getDevices(path string) ([]*configs.Device, error) {
files, err := ioutilReadDir(path)
if err != nil {
return nil, err
}
out := []*configs.Device{}
for _, f := range files {
switch {
case f.IsDir():
switch f.Name() {
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
continue
default:
sub, err := getDevices(filepath.Join(path, f.Name()))
if err != nil {
return nil, err
}
out = append(out, sub...)
continue
}
case f.Name() == "console":
continue
}
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
if err != nil {
if err == ErrNotADevice {
continue
}
if os.IsNotExist(err) {
continue
}
return nil, err
}
out = append(out, device)
}
return out, nil
}

View file

@ -0,0 +1,63 @@
package devices
import (
"errors"
"os"
"testing"
"golang.org/x/sys/unix"
)
func TestDeviceFromPathLstatFailure(t *testing.T) {
testError := errors.New("test error")
// Override unix.Lstat to inject error.
unixLstat = func(path string, stat *unix.Stat_t) error {
return testError
}
_, err := DeviceFromPath("", "")
if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError)
}
}
func TestHostDevicesIoutilReadDirFailure(t *testing.T) {
testError := errors.New("test error")
// Override ioutil.ReadDir to inject error.
ioutilReadDir = func(dirname string) ([]os.FileInfo, error) {
return nil, testError
}
_, err := HostDevices()
if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError)
}
}
func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) {
testError := errors.New("test error")
called := false
// Override ioutil.ReadDir to inject error after the first call.
ioutilReadDir = func(dirname string) ([]os.FileInfo, error) {
if called {
return nil, testError
}
called = true
// Provoke a second call.
fi, err := os.Lstat("/tmp")
if err != nil {
t.Fatalf("Unexpected error %v", err)
}
return []os.FileInfo{fi}, nil
}
_, err := HostDevices()
if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError)
}
}

View file

@ -60,9 +60,9 @@ func (c ErrorCode) String() string {
type Error interface {
error
// Returns a verbose string including the error message
// and a representation of the stack trace suitable for
// printing.
// Returns an error if it failed to write the detail of the Error to w.
// The detail of the Error may include the error message and a
// representation of the stack trace.
Detail(w io.Writer) error
// Returns the error code for this error.

View file

@ -0,0 +1,25 @@
package libcontainer
import "testing"
func TestErrorCode(t *testing.T) {
codes := map[ErrorCode]string{
IdInUse: "Id already in use",
InvalidIdFormat: "Invalid format",
ContainerPaused: "Container paused",
ConfigInvalid: "Invalid configuration",
SystemError: "System error",
ContainerNotExists: "Container does not exist",
ContainerNotStopped: "Container is not stopped",
ContainerNotRunning: "Container is not running",
ConsoleExists: "Console exists for process",
ContainerNotPaused: "Container is not paused",
NoProcessOps: "No process operations",
}
for code, expected := range codes {
if actual := code.String(); actual != expected {
t.Fatalf("expected string %q but received %q", expected, actual)
}
}
}

View file

@ -10,7 +10,7 @@ type Factory interface {
// between 1 and 1024 characters, inclusive.
//
// The id must not already be in use by an existing container. Containers created using
// a factory with the same path (and file system) must have distinct ids.
// a factory with the same path (and filesystem) must have distinct ids.
//
// Returns the new container with a running process.
//
@ -28,7 +28,6 @@ type Factory interface {
//
// errors:
// Path does not exist
// Container is stopped
// System error
Load(id string) (Container, error)

View file

@ -6,58 +6,42 @@ import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime/debug"
"strconv"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/mount"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
const (
stateFilename = "state.json"
stateFilename = "state.json"
execFifoFilename = "exec.fifo"
)
var (
idRegex = regexp.MustCompile(`^[\w-\.]+$`)
maxIdLen = 1024
)
var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init arguments.
// provided init binary path and arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
name := args[0]
if filepath.Base(name) == name {
if lp, err := exec.LookPath(name); err == nil {
name = lp
return func(l *LinuxFactory) (err error) {
if len(args) > 0 {
// Resolve relative paths to ensure that its available
// after directory changes.
if args[0], err = filepath.Abs(args[0]); err != nil {
return newGenericError(err, ConfigInvalid)
}
} else {
abs, err := filepath.Abs(name)
if err != nil {
return err
}
name = abs
}
l.InitPath = "/proc/self/exe"
l.InitArgs = append([]string{name}, args[1:]...)
return nil
}
}
// InitPath returns an options func to configure a LinuxFactory with the
// provided absolute path to the init binary and arguements.
func InitPath(path string, args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.InitPath = path
l.InitArgs = args
return nil
}
@ -88,6 +72,20 @@ func Cgroupfs(l *LinuxFactory) error {
return nil
}
// IntelRdtfs is an options func to configure a LinuxFactory to return
// containers that use the Intel RDT "resource control" filesystem to
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
func IntelRdtFs(l *LinuxFactory) error {
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
return &intelrdt.IntelRdtManager{
Config: config,
Id: id,
Path: path,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
@ -95,13 +93,22 @@ func TmpfsRoot(l *LinuxFactory) error {
return err
}
if !mounted {
if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
return err
}
}
return nil
}
// CriuPath returns an option func to configure a LinuxFactory with the
// provided criupath
func CriuPath(criupath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.CriuPath = criupath
return nil
}
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
@ -112,12 +119,16 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
}
l := &LinuxFactory{
Root: root,
InitPath: "/proc/self/exe",
InitArgs: []string{os.Args[0], "init"},
Validator: validate.New(),
CriuPath: "criu",
}
InitArgs(os.Args[0], "init")(l)
Cgroupfs(l)
for _, opt := range options {
if opt == nil {
continue
}
if err := opt(l); err != nil {
return nil, err
}
@ -130,7 +141,8 @@ type LinuxFactory struct {
// Root directory for the factory to store state.
Root string
// InitPath is the absolute path to the init binary.
// InitPath is the path for calling the init responsibilities for spawning
// a container.
InitPath string
// InitArgs are arguments for calling the init responsibilities for spawning
@ -141,11 +153,19 @@ type LinuxFactory struct {
// containers.
CriuPath string
// New{u,g}uidmapPath is the path to the binaries used for mapping with
// rootless containers.
NewuidmapPath string
NewgidmapPath string
// Validator provides validation to container configurations.
Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@ -164,7 +184,10 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0700); err != nil {
if err := os.MkdirAll(containerRoot, 0711); err != nil {
return nil, newGenericError(err, SystemError)
}
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
return nil, newGenericError(err, SystemError)
}
c := &linuxContainer{
@ -174,8 +197,13 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
if intelrdt.IsEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c}
return c, nil
}
@ -185,7 +213,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
state, err := l.loadState(containerRoot)
state, err := l.loadState(containerRoot, id)
if err != nil {
return nil, err
}
@ -195,20 +223,26 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
fds: state.ExternalDescriptors,
}
c := &linuxContainer{
initProcess: r,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
}
c.state = &createdState{c: c, s: Created}
c.state = &loadedState{c: c}
if err := c.refreshState(); err != nil {
return nil, err
}
if intelrdt.IsEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
}
return c, nil
}
@ -219,55 +253,79 @@ func (l *LinuxFactory) Type() string {
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
pipefd, err := strconv.Atoi(fdStr)
var (
pipefd, fifofd int
consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
)
// Get the INITPIPE.
pipefd, err = strconv.Atoi(envInitPipe)
if err != nil {
return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
defer pipe.Close()
// Only init processes have FIFOFD.
fifofd = -1
if it == initStandard {
if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
}
}
if envConsole != "" {
console, err := strconv.Atoi(envConsole)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
}
consoleSocket = os.NewFile(uintptr(console), "console-socket")
defer consoleSocket.Close()
}
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
var i initer
defer func() {
// We have an error during the initialization of the container's init,
// send it back to the parent process in the form of an initError.
// If container's init successed, syscall.Exec will not return, hence
// this defer function will never be called.
if _, ok := i.(*linuxStandardInit); ok {
// Synchronisation only necessary for standard init.
if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
panic(err)
}
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
panic(err)
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
// ensure that this pipe is always closed
pipe.Close()
}()
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
}
}()
i, err = newContainerInit(it, pipe)
i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
if err != nil {
return err
}
// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
return i.Init()
}
func (l *LinuxFactory) loadState(root string) (*State, error) {
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
f, err := os.Open(filepath.Join(root, stateFilename))
if err != nil {
if os.IsNotExist(err) {
return nil, newGenericError(err, ContainerNotExists)
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
}
return nil, newGenericError(err, SystemError)
}
@ -283,8 +341,24 @@ func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
if len(id) > maxIdLen {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
return nil
}
// NewuidmapPath returns an option func to configure a LinuxFactory with the
// provided ..
func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.NewuidmapPath = newuidmapPath
return nil
}
}
// NewgidmapPath returns an option func to configure a LinuxFactory with the
// provided ..
func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.NewgidmapPath = newgidmapPath
return nil
}
}

View file

@ -0,0 +1,234 @@
// +build linux
package libcontainer
import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/mount"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {
return "", err
}
return dir, nil
}
func TestFactoryNew(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
}
func TestFactoryNewIntelRdt(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs, IntelRdtFs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
}
func TestFactoryNewTmpfs(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs, TmpfsRoot)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
mounted, err := mount.Mounted(lfactory.Root)
if err != nil {
t.Fatal(err)
}
if !mounted {
t.Fatalf("Factory Root is not mounted")
}
mounts, err := mount.GetMounts()
if err != nil {
t.Fatal(err)
}
var found bool
for _, m := range mounts {
if m.Mountpoint == lfactory.Root {
if m.Fstype != "tmpfs" {
t.Fatalf("Fstype of root: %s, expected %s", m.Fstype, "tmpfs")
}
if m.Source != "tmpfs" {
t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs")
}
found = true
}
}
if !found {
t.Fatalf("Factory Root is not listed in mounts list")
}
defer unix.Unmount(root, unix.MNT_DETACH)
}
func TestFactoryLoadNotExists(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
_, err = factory.Load("nocontainer")
if err == nil {
t.Fatal("expected nil error loading non-existing container")
}
lerr, ok := err.(Error)
if !ok {
t.Fatal("expected libcontainer error type")
}
if lerr.Code() != ContainerNotExists {
t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code())
}
}
func TestFactoryLoadContainer(t *testing.T) {
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
// setup default container config and state for mocking
var (
id = "1"
expectedHooks = &configs.Hooks{
Prestart: []configs.Hook{
configs.CommandHook{Command: configs.Command{Path: "prestart-hook"}},
},
Poststart: []configs.Hook{
configs.CommandHook{Command: configs.Command{Path: "poststart-hook"}},
},
Poststop: []configs.Hook{
unserializableHook{},
configs.CommandHook{Command: configs.Command{Path: "poststop-hook"}},
},
}
expectedConfig = &configs.Config{
Rootfs: "/mycontainer/root",
Hooks: expectedHooks,
}
expectedState = &State{
BaseState: BaseState{
InitProcessPid: 1024,
Config: *expectedConfig,
},
}
)
if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil {
t.Fatal(err)
}
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
t.Fatal(err)
}
factory, err := New(root, Cgroupfs, IntelRdtFs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Load(id)
if err != nil {
t.Fatal(err)
}
if container.ID() != id {
t.Fatalf("expected container id %q but received %q", id, container.ID())
}
config := container.Config()
if config.Rootfs != expectedConfig.Rootfs {
t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs)
}
expectedHooks.Poststop = expectedHooks.Poststop[1:] // expect unserializable hook to be skipped
if !reflect.DeepEqual(config.Hooks, expectedHooks) {
t.Fatalf("expects hooks %q but received %q", expectedHooks, config.Hooks)
}
lcontainer, ok := container.(*linuxContainer)
if !ok {
t.Fatal("expected linux container on linux based systems")
}
if lcontainer.initProcess.pid() != expectedState.InitProcessPid {
t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid())
}
}
func marshal(path string, v interface{}) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return utils.WriteJSON(f, v)
}
type unserializableHook struct{}
func (unserializableHook) Run(configs.HookState) error {
return nil
}

View file

@ -1,6 +1,7 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
@ -8,20 +9,6 @@ import (
"github.com/opencontainers/runc/libcontainer/stacktrace"
)
type syncType uint8
const (
procReady syncType = iota
procError
procRun
procHooks
procResume
)
type syncT struct {
Type syncType `json:"type"`
}
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}
@ -51,14 +38,27 @@ func newGenericError(err error, c ErrorCode) Error {
}
func newSystemError(err error) Error {
if le, ok := err.(Error); ok {
return le
}
return createSystemError(err, "")
}
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
return createSystemError(err, fmt.Sprintf(cause, v...))
}
func newSystemErrorWithCause(err error, cause string) Error {
return createSystemError(err, cause)
}
// createSystemError creates the specified error with the correct number of
// stack frames skipped. This is only to be called by the other functions for
// formatting the error.
func createSystemError(err error, cause string) Error {
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Stack: stacktrace.Capture(1),
Cause: cause,
Stack: stacktrace.Capture(2),
}
if err != nil {
gerr.Message = err.Error()
@ -70,12 +70,17 @@ type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Cause string
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
return e.Message
if e.Cause == "" {
return e.Message
}
frame := e.Stack.Frames[0]
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
}
func (e *genericError) Code() ErrorCode {

View file

@ -0,0 +1,49 @@
package libcontainer
import (
"fmt"
"io/ioutil"
"testing"
)
func TestErrorDetail(t *testing.T) {
err := newGenericError(fmt.Errorf("test error"), SystemError)
if derr := err.Detail(ioutil.Discard); derr != nil {
t.Fatal(derr)
}
}
func TestErrorWithCode(t *testing.T) {
err := newGenericError(fmt.Errorf("test error"), SystemError)
if code := err.Code(); code != SystemError {
t.Fatalf("expected err code %q but %q", SystemError, code)
}
}
func TestErrorWithError(t *testing.T) {
cc := []struct {
errmsg string
cause string
}{
{
errmsg: "test error",
},
{
errmsg: "test error",
cause: "test",
},
}
for _, v := range cc {
err := newSystemErrorWithCause(fmt.Errorf(v.errmsg), v.cause)
msg := err.Error()
if v.cause == "" && msg != v.errmsg {
t.Fatalf("expected err(%q) equal errmsg(%q)", msg, v.errmsg)
}
if v.cause != "" && msg == v.errmsg {
t.Fatalf("unexpected err(%q) equal errmsg(%q)", msg, v.errmsg)
}
}
}

View file

@ -6,19 +6,21 @@ import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"strconv"
"strings"
"syscall"
"syscall" // only for Errno
"unsafe"
"github.com/Sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/containerd/console"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
@ -30,7 +32,8 @@ const (
)
type pid struct {
Pid int `json:"pid"`
Pid int `json:"pid"`
PidFirstChild int `json:"pid_first"`
}
// network is an internal struct used to setup container networks.
@ -44,27 +47,31 @@ type network struct {
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities []string `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities *configs.Capabilities `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"`
ConsoleWidth uint16 `json:"console_width"`
ConsoleHeight uint16 `json:"console_height"`
Rootless bool `json:"rootless"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File) (initer, error) {
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
@ -75,13 +82,17 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
switch t {
case initSetns:
return &linuxSetnsInit{
config: config,
pipe: pipe,
consoleSocket: consoleSocket,
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
parentPid: syscall.Getppid(),
config: config,
pipe: pipe,
consoleSocket: consoleSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
@ -113,16 +124,18 @@ func finalizeNamespace(config *initConfig) error {
return err
}
capabilities := config.Config.Capabilities
capabilities := &configs.Capabilities{}
if config.Capabilities != nil {
capabilities = config.Capabilities
} else if config.Config.Capabilities != nil {
capabilities = config.Config.Capabilities
}
w, err := newCapWhitelist(capabilities)
w, err := newContainerCapList(capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.dropBoundingSet(); err != nil {
if err := w.ApplyBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
@ -135,36 +148,79 @@ func finalizeNamespace(config *initConfig) error {
if err := system.ClearKeepCaps(); err != nil {
return err
}
// drop all other capabilities
if err := w.drop(); err != nil {
if err := w.ApplyCaps(); err != nil {
return err
}
if config.Cwd != "" {
if err := syscall.Chdir(config.Cwd); err != nil {
return err
if err := unix.Chdir(config.Cwd); err != nil {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
}
}
return nil
}
// setupConsole sets up the console from inside the container, and sends the
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
// consoles are scoped to a container properly (see runc#814 and the many
// issues related to that). This has to be run *after* we've pivoted to the new
// rootfs (and the users' configuration is entirely set up).
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
defer socket.Close()
// At this point, /dev/ptmx points to something that we would expect. We
// used to change the owner of the slave path, but since the /dev/pts mount
// can have gid=X set (at the users' option). So touching the owner of the
// slave PTY is not necessary, as the kernel will handle that for us. Note
// however, that setupUser (specifically fixStdioPermissions) *will* change
// the UID owner of the console to be the user the process will run as (so
// they can actually control their console).
pty, slavePath, err := console.NewPty()
if err != nil {
return err
}
if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
err = pty.Resize(console.WinSize{
Height: config.ConsoleHeight,
Width: config.ConsoleWidth,
})
if err != nil {
return err
}
}
// After we return from here, we don't need the console anymore.
defer pty.Close()
// Mount the console inside our rootfs.
if mount {
if err := mountConsole(slavePath); err != nil {
return err
}
}
// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
return err
}
// Now, dup over all the things.
return dupStdio(slavePath)
}
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil {
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procRun {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procRun); err != nil {
return err
}
return nil
}
@ -173,19 +229,15 @@ func syncParentReady(pipe io.ReadWriter) error {
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
if err := writeSync(pipe, procHooks); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procResume {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procResume); err != nil {
return err
}
return nil
}
@ -193,40 +245,69 @@ func syncParentHooks(pipe io.ReadWriter) error {
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Uid: 0,
Gid: 0,
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
/* var addGroups []int
if len(config.Config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}*/
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
var addGroups []int
if len(config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}
// Rather than just erroring out later in setuid(2) and setgid(2), check
// that the user is mapped here.
if _, err := config.Config.HostUID(execUser.Uid); err != nil {
return fmt.Errorf("cannot set uid to unmapped user in user namespace")
}
if _, err := config.Config.HostGID(execUser.Gid); err != nil {
return fmt.Errorf("cannot set gid to unmapped user in user namespace")
}
if config.Rootless {
// We cannot set any additional groups in a rootless container and thus
// we bail if the user asked us to do so. TODO: We currently can't do
// this check earlier, but if libcontainer.Process.User was typesafe
// this might work.
if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container")
}
}
// Before we change to the container's user make sure that the processes
// STDIO is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(config, execUser); err != nil {
return err
}
/*
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
// There's nothing we can do about /etc/group entries, so we silently
// ignore setting groups here (since the user didn't explicitly ask us to
// set the group).
if !config.Rootless {
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}*/
if err := unix.Setgroups(suppGroups); err != nil {
return err
}
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
@ -234,6 +315,7 @@ func setupUser(config *initConfig) error {
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
@ -246,9 +328,9 @@ func setupUser(config *initConfig) error {
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(u *user.ExecUser) error {
var null syscall.Stat_t
if err := syscall.Stat("/dev/null", &null); err != nil {
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
var null unix.Stat_t
if err := unix.Stat("/dev/null", &null); err != nil {
return err
}
for _, fd := range []uintptr{
@ -256,15 +338,32 @@ func fixStdioPermissions(u *user.ExecUser) error {
os.Stderr.Fd(),
os.Stdout.Fd(),
} {
var s syscall.Stat_t
if err := syscall.Fstat(int(fd), &s); err != nil {
var s unix.Stat_t
if err := unix.Fstat(int(fd), &s); err != nil {
return err
}
// skip chown of /dev/null if it was used as one of the STDIO fds.
// Skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
// We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
// If we've hit an EINVAL then s.Gid isn't mapped in the user
// namespace. If we've hit an EPERM then the inode's current owner
// is not mapped in our user namespace (in particular,
// privileged_wrt_inode_uidgid() has failed). In either case, we
// are in a configuration where it's better for us to just not
// touch the stdio rather than bail at this point.
if err == unix.EINVAL || err == unix.EPERM {
continue
}
return err
}
}
@ -319,23 +418,58 @@ func setupRoute(config *configs.Config) error {
func setupRlimits(limits []configs.Rlimit, pid int) error {
for _, rlimit := range limits {
if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
func setOomScoreAdj(oomScoreAdj int, pid int) error {
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
const _P_PID = 1
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
type siginfo struct {
si_signo int32
si_errno int32
si_code int32
// below here is a union; si_pid is the only field we use
si_pid int32
// Pad to 128 bytes as detailed in blockUntilWaitable
pad [96]byte
}
// killCgroupProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending a SIGKILL to each process then waiting for them to
// exit.
func killCgroupProcesses(m cgroups.Manager) error {
// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
si := &siginfo{}
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
if e != 0 {
return false, os.NewSyscallError("waitid", e)
}
return si.si_pid != 0, nil
}
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
func isNoChildren(err error) bool {
switch err := err.(type) {
case syscall.Errno:
if err == unix.ECHILD {
return true
}
case *os.SyscallError:
if err.Err == unix.ECHILD {
return true
}
}
return false
}
// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL then it will wait for each process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
@ -352,16 +486,48 @@ func killCgroupProcesses(m cgroups.Manager) error {
continue
}
procs = append(procs, p)
if err := p.Kill(); err != nil {
if err := p.Signal(s); err != nil {
logrus.Warn(err)
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}
subreaper, err := system.GetSubreaper()
if err != nil {
// The error here means that PR_GET_CHILD_SUBREAPER is not
// supported because this code might run on a kernel older
// than 3.4. We don't want to throw an error in that case,
// and we simplify things, considering there is no subreaper
// set.
subreaper = 0
}
for _, p := range procs {
if _, err := p.Wait(); err != nil {
logrus.Warn(err)
if s != unix.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil {
if !isNoChildren(err) {
logrus.Warn("signalAllProcesses: ", p.Pid, err)
}
continue
} else if !ok {
// Not ready to report so don't wait
continue
}
}
// In case a subreaper has been setup, this code must not
// wait for the process. Otherwise, we cannot be sure the
// current process will be reaped by the subreaper, while
// the subreaper might be waiting for this process in order
// to retrieve its exit code.
if subreaper == 0 {
if _, err := p.Wait(); err != nil {
if !isNoChildren(err) {
logrus.Warn("wait: ", err)
}
}
}
}
return nil

Some files were not shown because too many files have changed in this diff Show more