test: Bump up runc to 9f9c96235cc97674e935002fc3d78361b696a69e

This brings in a fix for a cgroups setup race condition
that we hit sometimes in the tests.

Signed-off-by: Mrunal Patel <mrunalp@gmail.com>
This commit is contained in:
Mrunal Patel 2018-01-19 13:37:10 -08:00
parent 77561e95cf
commit 970b8d61a7
89 changed files with 3197 additions and 970 deletions

View file

@ -57,7 +57,7 @@ RUN mkdir -p /usr/src/criu \
&& rm -rf /usr/src/criu && rm -rf /usr/src/criu
# Install runc # Install runc
ENV RUNC_COMMIT c6e4a1ebeb1a72b529c6f1b6ee2b1ae5b868b14f ENV RUNC_COMMIT 9f9c96235cc97674e935002fc3d78361b696a69e
RUN set -x \ RUN set -x \
&& export GOPATH="$(mktemp -d)" \ && export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ && git clone https://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \

View file

@ -4,7 +4,7 @@
git: git:
repo: "https://github.com/opencontainers/runc.git" repo: "https://github.com/opencontainers/runc.git"
dest: "{{ ansible_env.GOPATH }}/src/github.com/opencontainers/runc" dest: "{{ ansible_env.GOPATH }}/src/github.com/opencontainers/runc"
version: "c6e4a1ebeb1a72b529c6f1b6ee2b1ae5b868b14f" version: "9f9c96235cc97674e935002fc3d78361b696a69e"
- name: build runc - name: build runc
make: make:

View file

@ -21,7 +21,7 @@ google.golang.org/grpc v1.0.4 https://github.com/grpc/grpc-go
github.com/opencontainers/selinux b29023b86e4a69d1b46b7e7b4e2b6fda03f0b9cd github.com/opencontainers/selinux b29023b86e4a69d1b46b7e7b4e2b6fda03f0b9cd
github.com/opencontainers/go-digest v1.0.0-rc0 github.com/opencontainers/go-digest v1.0.0-rc0
github.com/opencontainers/runtime-tools 625e2322645b151a7cbb93a8b42920933e72167f github.com/opencontainers/runtime-tools 625e2322645b151a7cbb93a8b42920933e72167f
github.com/opencontainers/runc 45bde006ca8c90e089894508708bcf0e2cdf9e13 github.com/opencontainers/runc 9f9c96235cc97674e935002fc3d78361b696a69e
github.com/mrunalp/fileutils master github.com/mrunalp/fileutils master
github.com/vishvananda/netlink master github.com/vishvananda/netlink master
github.com/vishvananda/netns master github.com/vishvananda/netns master
@ -64,7 +64,7 @@ github.com/ugorji/go d23841a297e5489e787e72fceffabf9d2994b52a
github.com/spf13/pflag 9ff6c6923cfffbcd502984b8e0c80539a94968b7 github.com/spf13/pflag 9ff6c6923cfffbcd502984b8e0c80539a94968b7
golang.org/x/crypto 3fbbcd23f1cb824e69491a5930cfeff09b12f4d2 golang.org/x/crypto 3fbbcd23f1cb824e69491a5930cfeff09b12f4d2
golang.org/x/net c427ad74c6d7a814201695e9ffde0c5d400a7674 golang.org/x/net c427ad74c6d7a814201695e9ffde0c5d400a7674
golang.org/x/sys 9aade4d3a3b7e6d876cd3823ad20ec45fc035402 golang.org/x/sys 7ddbeae9ae08c6a06a59597f0c9edbc5ff2444ce
golang.org/x/text f72d8390a633d5dfb0cc84043294db9f6c935756 golang.org/x/text f72d8390a633d5dfb0cc84043294db9f6c935756
github.com/kr/pty v1.0.0 github.com/kr/pty v1.0.0
github.com/google/btree 7d79101e329e5a3adf994758c578dab82b90c017 github.com/google/btree 7d79101e329e5a3adf994758c578dab82b90c017
@ -116,3 +116,5 @@ github.com/pmezard/go-difflib v1.0.0
github.com/xeipuuv/gojsonreference master github.com/xeipuuv/gojsonreference master
github.com/xeipuuv/gojsonschema master github.com/xeipuuv/gojsonschema master
github.com/xeipuuv/gojsonpointer master github.com/xeipuuv/gojsonpointer master
github.com/containerd/console 84eeaae905fa414d03e07bcd6c8d3f19e7cf180e
github.com/cyphar/filepath-securejoin v0.2.1

201
vendor/github.com/containerd/console/LICENSE generated vendored Normal file
View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

17
vendor/github.com/containerd/console/README.md generated vendored Normal file
View file

@ -0,0 +1,17 @@
# console
[![Build Status](https://travis-ci.org/containerd/console.svg?branch=master)](https://travis-ci.org/containerd/console)
Golang package for dealing with consoles. Light on deps and a simple API.
## Modifying the current process
```go
current := console.Current()
defer current.Reset()
if err := current.SetRaw(); err != nil {
}
ws, err := current.Size()
current.Resize(ws)
```

62
vendor/github.com/containerd/console/console.go generated vendored Normal file
View file

@ -0,0 +1,62 @@
package console
import (
"errors"
"io"
"os"
)
var ErrNotAConsole = errors.New("provided file is not a console")
type Console interface {
io.Reader
io.Writer
io.Closer
// Resize resizes the console to the provided window size
Resize(WinSize) error
// ResizeFrom resizes the calling console to the size of the
// provided console
ResizeFrom(Console) error
// SetRaw sets the console in raw mode
SetRaw() error
// DisableEcho disables echo on the console
DisableEcho() error
// Reset restores the console to its orignal state
Reset() error
// Size returns the window size of the console
Size() (WinSize, error)
// Fd returns the console's file descriptor
Fd() uintptr
// Name returns the console's file name
Name() string
}
// WinSize specifies the window size of the console
type WinSize struct {
// Height of the console
Height uint16
// Width of the console
Width uint16
x uint16
y uint16
}
// Current returns the current processes console
func Current() Console {
c, err := ConsoleFromFile(os.Stdin)
if err != nil {
// stdin should always be a console for the design
// of this function
panic(err)
}
return c
}
// ConsoleFromFile returns a console using the provided file
func ConsoleFromFile(f *os.File) (Console, error) {
if err := checkConsole(f); err != nil {
return nil, err
}
return newMaster(f)
}

255
vendor/github.com/containerd/console/console_linux.go generated vendored Normal file
View file

@ -0,0 +1,255 @@
// +build linux
package console
import (
"io"
"os"
"sync"
"golang.org/x/sys/unix"
)
const (
maxEvents = 128
)
// Epoller manages multiple epoll consoles using edge-triggered epoll api so we
// dont have to deal with repeated wake-up of EPOLLER or EPOLLHUP.
// For more details, see:
// - https://github.com/systemd/systemd/pull/4262
// - https://github.com/moby/moby/issues/27202
//
// Example usage of Epoller and EpollConsole can be as follow:
//
// epoller, _ := NewEpoller()
// epollConsole, _ := epoller.Add(console)
// go epoller.Wait()
// var (
// b bytes.Buffer
// wg sync.WaitGroup
// )
// wg.Add(1)
// go func() {
// io.Copy(&b, epollConsole)
// wg.Done()
// }()
// // perform I/O on the console
// epollConsole.Shutdown(epoller.CloseConsole)
// wg.Wait()
// epollConsole.Close()
type Epoller struct {
efd int
mu sync.Mutex
fdMapping map[int]*EpollConsole
}
// NewEpoller returns an instance of epoller with a valid epoll fd.
func NewEpoller() (*Epoller, error) {
efd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
if err != nil {
return nil, err
}
return &Epoller{
efd: efd,
fdMapping: make(map[int]*EpollConsole),
}, nil
}
// Add creates a epoll console based on the provided console. The console will
// be registered with EPOLLET (i.e. using edge-triggered notification) and its
// file descriptor will be set to non-blocking mode. After this, user should use
// the return console to perform I/O.
func (e *Epoller) Add(console Console) (*EpollConsole, error) {
sysfd := int(console.Fd())
// Set sysfd to non-blocking mode
if err := unix.SetNonblock(sysfd, true); err != nil {
return nil, err
}
ev := unix.EpollEvent{
Events: unix.EPOLLIN | unix.EPOLLOUT | unix.EPOLLRDHUP | unix.EPOLLET,
Fd: int32(sysfd),
}
if err := unix.EpollCtl(e.efd, unix.EPOLL_CTL_ADD, sysfd, &ev); err != nil {
return nil, err
}
ef := &EpollConsole{
Console: console,
sysfd: sysfd,
readc: sync.NewCond(&sync.Mutex{}),
writec: sync.NewCond(&sync.Mutex{}),
}
e.mu.Lock()
e.fdMapping[sysfd] = ef
e.mu.Unlock()
return ef, nil
}
// Wait starts the loop to wait for its consoles' notifications and signal
// appropriate console that it can perform I/O.
func (e *Epoller) Wait() error {
events := make([]unix.EpollEvent, maxEvents)
for {
n, err := unix.EpollWait(e.efd, events, -1)
if err != nil {
// EINTR: The call was interrupted by a signal handler before either
// any of the requested events occurred or the timeout expired
if err == unix.EINTR {
continue
}
return err
}
for i := 0; i < n; i++ {
ev := &events[i]
// the console is ready to be read from
if ev.Events&(unix.EPOLLIN|unix.EPOLLHUP|unix.EPOLLERR) != 0 {
if epfile := e.getConsole(int(ev.Fd)); epfile != nil {
epfile.signalRead()
}
}
// the console is ready to be written to
if ev.Events&(unix.EPOLLOUT|unix.EPOLLHUP|unix.EPOLLERR) != 0 {
if epfile := e.getConsole(int(ev.Fd)); epfile != nil {
epfile.signalWrite()
}
}
}
}
}
// Close unregister the console's file descriptor from epoll interface
func (e *Epoller) CloseConsole(fd int) error {
e.mu.Lock()
defer e.mu.Unlock()
delete(e.fdMapping, fd)
return unix.EpollCtl(e.efd, unix.EPOLL_CTL_DEL, fd, &unix.EpollEvent{})
}
func (e *Epoller) getConsole(sysfd int) *EpollConsole {
e.mu.Lock()
f := e.fdMapping[sysfd]
e.mu.Unlock()
return f
}
// Close the epoll fd
func (e *Epoller) Close() error {
return unix.Close(e.efd)
}
// EpollConsole acts like a console but register its file descriptor with a
// epoll fd and uses epoll API to perform I/O.
type EpollConsole struct {
Console
readc *sync.Cond
writec *sync.Cond
sysfd int
closed bool
}
// Read reads up to len(p) bytes into p. It returns the number of bytes read
// (0 <= n <= len(p)) and any error encountered.
//
// If the console's read returns EAGAIN or EIO, we assumes that its a
// temporary error because the other side went away and wait for the signal
// generated by epoll event to continue.
func (ec *EpollConsole) Read(p []byte) (n int, err error) {
var read int
ec.readc.L.Lock()
defer ec.readc.L.Unlock()
for {
read, err = ec.Console.Read(p[n:])
n += read
if err != nil {
var hangup bool
if perr, ok := err.(*os.PathError); ok {
hangup = (perr.Err == unix.EAGAIN || perr.Err == unix.EIO)
} else {
hangup = (err == unix.EAGAIN || err == unix.EIO)
}
// if the other end disappear, assume this is temporary and wait for the
// signal to continue again. Unless we didnt read anything and the
// console is already marked as closed then we should exit
if hangup && !(n == 0 && len(p) > 0 && ec.closed) {
ec.readc.Wait()
continue
}
}
break
}
// if we didnt read anything then return io.EOF to end gracefully
if n == 0 && len(p) > 0 && err == nil {
err = io.EOF
}
// signal for others that we finished the read
ec.readc.Signal()
return n, err
}
// Writes len(p) bytes from p to the console. It returns the number of bytes
// written from p (0 <= n <= len(p)) and any error encountered that caused
// the write to stop early.
//
// If writes to the console returns EAGAIN or EIO, we assumes that its a
// temporary error because the other side went away and wait for the signal
// generated by epoll event to continue.
func (ec *EpollConsole) Write(p []byte) (n int, err error) {
var written int
ec.writec.L.Lock()
defer ec.writec.L.Unlock()
for {
written, err = ec.Console.Write(p[n:])
n += written
if err != nil {
var hangup bool
if perr, ok := err.(*os.PathError); ok {
hangup = (perr.Err == unix.EAGAIN || perr.Err == unix.EIO)
} else {
hangup = (err == unix.EAGAIN || err == unix.EIO)
}
// if the other end disappear, assume this is temporary and wait for the
// signal to continue again.
if hangup {
ec.writec.Wait()
continue
}
}
// unrecoverable error, break the loop and return the error
break
}
if n < len(p) && err == nil {
err = io.ErrShortWrite
}
// signal for others that we finished the write
ec.writec.Signal()
return n, err
}
// Close closed the file descriptor and signal call waiters for this fd.
// It accepts a callback which will be called with the console's fd. The
// callback typically will be used to do further cleanup such as unregister the
// console's fd from the epoll interface.
// User should call Shutdown and wait for all I/O operation to be finished
// before closing the console.
func (ec *EpollConsole) Shutdown(close func(int) error) error {
ec.readc.L.Lock()
defer ec.readc.L.Unlock()
ec.writec.L.Lock()
defer ec.writec.L.Unlock()
ec.readc.Broadcast()
ec.writec.Broadcast()
ec.closed = true
return close(ec.sysfd)
}
// signalRead signals that the console is readable.
func (ec *EpollConsole) signalRead() {
ec.readc.Signal()
}
// signalWrite signals that the console is writable.
func (ec *EpollConsole) signalWrite() {
ec.writec.Signal()
}

142
vendor/github.com/containerd/console/console_unix.go generated vendored Normal file
View file

@ -0,0 +1,142 @@
// +build darwin freebsd linux solaris
package console
import (
"os"
"golang.org/x/sys/unix"
)
// NewPty creates a new pty pair
// The master is returned as the first console and a string
// with the path to the pty slave is returned as the second
func NewPty() (Console, string, error) {
f, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, "", err
}
slave, err := ptsname(f)
if err != nil {
return nil, "", err
}
if err := unlockpt(f); err != nil {
return nil, "", err
}
m, err := newMaster(f)
if err != nil {
return nil, "", err
}
return m, slave, nil
}
type master struct {
f *os.File
original *unix.Termios
}
func (m *master) Read(b []byte) (int, error) {
return m.f.Read(b)
}
func (m *master) Write(b []byte) (int, error) {
return m.f.Write(b)
}
func (m *master) Close() error {
return m.f.Close()
}
func (m *master) Resize(ws WinSize) error {
return tcswinsz(m.f.Fd(), ws)
}
func (m *master) ResizeFrom(c Console) error {
ws, err := c.Size()
if err != nil {
return err
}
return m.Resize(ws)
}
func (m *master) Reset() error {
if m.original == nil {
return nil
}
return tcset(m.f.Fd(), m.original)
}
func (m *master) getCurrent() (unix.Termios, error) {
var termios unix.Termios
if err := tcget(m.f.Fd(), &termios); err != nil {
return unix.Termios{}, err
}
return termios, nil
}
func (m *master) SetRaw() error {
rawState, err := m.getCurrent()
if err != nil {
return err
}
rawState = cfmakeraw(rawState)
rawState.Oflag = rawState.Oflag | unix.OPOST
return tcset(m.f.Fd(), &rawState)
}
func (m *master) DisableEcho() error {
rawState, err := m.getCurrent()
if err != nil {
return err
}
rawState.Lflag = rawState.Lflag &^ unix.ECHO
return tcset(m.f.Fd(), &rawState)
}
func (m *master) Size() (WinSize, error) {
return tcgwinsz(m.f.Fd())
}
func (m *master) Fd() uintptr {
return m.f.Fd()
}
func (m *master) Name() string {
return m.f.Name()
}
// checkConsole checks if the provided file is a console
func checkConsole(f *os.File) error {
var termios unix.Termios
if tcget(f.Fd(), &termios) != nil {
return ErrNotAConsole
}
return nil
}
func newMaster(f *os.File) (Console, error) {
m := &master{
f: f,
}
t, err := m.getCurrent()
if err != nil {
return nil, err
}
m.original = &t
return m, nil
}
// ClearONLCR sets the necessary tty_ioctl(4)s to ensure that a pty pair
// created by us acts normally. In particular, a not-very-well-known default of
// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
// problem for terminal emulators, because we relay data from the terminal we
// also relay that funky line discipline.
func ClearONLCR(fd uintptr) error {
return setONLCR(fd, false)
}
// SetONLCR sets the necessary tty_ioctl(4)s to ensure that a pty pair
// created by us acts as intended for a terminal emulator.
func SetONLCR(fd uintptr) error {
return setONLCR(fd, true)
}

200
vendor/github.com/containerd/console/console_windows.go generated vendored Normal file
View file

@ -0,0 +1,200 @@
package console
import (
"fmt"
"os"
"github.com/pkg/errors"
"golang.org/x/sys/windows"
)
var (
vtInputSupported bool
ErrNotImplemented = errors.New("not implemented")
)
func (m *master) initStdios() {
m.in = windows.Handle(os.Stdin.Fd())
if err := windows.GetConsoleMode(m.in, &m.inMode); err == nil {
// Validate that windows.ENABLE_VIRTUAL_TERMINAL_INPUT is supported, but do not set it.
if err = windows.SetConsoleMode(m.in, m.inMode|windows.ENABLE_VIRTUAL_TERMINAL_INPUT); err == nil {
vtInputSupported = true
}
// Unconditionally set the console mode back even on failure because SetConsoleMode
// remembers invalid bits on input handles.
windows.SetConsoleMode(m.in, m.inMode)
} else {
fmt.Printf("failed to get console mode for stdin: %v\n", err)
}
m.out = windows.Handle(os.Stdout.Fd())
if err := windows.GetConsoleMode(m.out, &m.outMode); err == nil {
if err := windows.SetConsoleMode(m.out, m.outMode|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING); err == nil {
m.outMode |= windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING
} else {
windows.SetConsoleMode(m.out, m.outMode)
}
} else {
fmt.Printf("failed to get console mode for stdout: %v\n", err)
}
m.err = windows.Handle(os.Stderr.Fd())
if err := windows.GetConsoleMode(m.err, &m.errMode); err == nil {
if err := windows.SetConsoleMode(m.err, m.errMode|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING); err == nil {
m.errMode |= windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING
} else {
windows.SetConsoleMode(m.err, m.errMode)
}
} else {
fmt.Printf("failed to get console mode for stderr: %v\n", err)
}
}
type master struct {
in windows.Handle
inMode uint32
out windows.Handle
outMode uint32
err windows.Handle
errMode uint32
}
func (m *master) SetRaw() error {
if err := makeInputRaw(m.in, m.inMode); err != nil {
return err
}
// Set StdOut and StdErr to raw mode, we ignore failures since
// windows.DISABLE_NEWLINE_AUTO_RETURN might not be supported on this version of
// Windows.
windows.SetConsoleMode(m.out, m.outMode|windows.DISABLE_NEWLINE_AUTO_RETURN)
windows.SetConsoleMode(m.err, m.errMode|windows.DISABLE_NEWLINE_AUTO_RETURN)
return nil
}
func (m *master) Reset() error {
for _, s := range []struct {
fd windows.Handle
mode uint32
}{
{m.in, m.inMode},
{m.out, m.outMode},
{m.err, m.errMode},
} {
if err := windows.SetConsoleMode(s.fd, s.mode); err != nil {
return errors.Wrap(err, "unable to restore console mode")
}
}
return nil
}
func (m *master) Size() (WinSize, error) {
var info windows.ConsoleScreenBufferInfo
err := windows.GetConsoleScreenBufferInfo(m.out, &info)
if err != nil {
return WinSize{}, errors.Wrap(err, "unable to get console info")
}
winsize := WinSize{
Width: uint16(info.Window.Right - info.Window.Left + 1),
Height: uint16(info.Window.Bottom - info.Window.Top + 1),
}
return winsize, nil
}
func (m *master) Resize(ws WinSize) error {
return ErrNotImplemented
}
func (m *master) ResizeFrom(c Console) error {
return ErrNotImplemented
}
func (m *master) DisableEcho() error {
mode := m.inMode &^ windows.ENABLE_ECHO_INPUT
mode |= windows.ENABLE_PROCESSED_INPUT
mode |= windows.ENABLE_LINE_INPUT
if err := windows.SetConsoleMode(m.in, mode); err != nil {
return errors.Wrap(err, "unable to set console to disable echo")
}
return nil
}
func (m *master) Close() error {
return nil
}
func (m *master) Read(b []byte) (int, error) {
panic("not implemented on windows")
}
func (m *master) Write(b []byte) (int, error) {
panic("not implemented on windows")
}
func (m *master) Fd() uintptr {
return uintptr(m.in)
}
// on windows, console can only be made from os.Std{in,out,err}, hence there
// isnt a single name here we can use. Return a dummy "console" value in this
// case should be sufficient.
func (m *master) Name() string {
return "console"
}
// makeInputRaw puts the terminal (Windows Console) connected to the given
// file descriptor into raw mode
func makeInputRaw(fd windows.Handle, mode uint32) error {
// See
// -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms686033(v=vs.85).aspx
// -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms683462(v=vs.85).aspx
// Disable these modes
mode &^= windows.ENABLE_ECHO_INPUT
mode &^= windows.ENABLE_LINE_INPUT
mode &^= windows.ENABLE_MOUSE_INPUT
mode &^= windows.ENABLE_WINDOW_INPUT
mode &^= windows.ENABLE_PROCESSED_INPUT
// Enable these modes
mode |= windows.ENABLE_EXTENDED_FLAGS
mode |= windows.ENABLE_INSERT_MODE
mode |= windows.ENABLE_QUICK_EDIT_MODE
if vtInputSupported {
mode |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT
}
if err := windows.SetConsoleMode(fd, mode); err != nil {
return errors.Wrap(err, "unable to set console to raw mode")
}
return nil
}
func checkConsole(f *os.File) error {
var mode uint32
if err := windows.GetConsoleMode(windows.Handle(f.Fd()), &mode); err != nil {
return err
}
return nil
}
func newMaster(f *os.File) (Console, error) {
if f != os.Stdin && f != os.Stdout && f != os.Stderr {
return nil, errors.New("creating a console from a file is not supported on windows")
}
m := &master{}
m.initStdios()
return m, nil
}

37
vendor/github.com/containerd/console/tc_darwin.go generated vendored Normal file
View file

@ -0,0 +1,37 @@
package console
import (
"fmt"
"os"
"unsafe"
"golang.org/x/sys/unix"
)
const (
cmdTcGet = unix.TIOCGETA
cmdTcSet = unix.TIOCSETA
)
func ioctl(fd, flag, data uintptr) error {
if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), unix.TIOCPTYUNLK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCPTYGNAME)
if err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

29
vendor/github.com/containerd/console/tc_freebsd.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
package console
import (
"fmt"
"os"
"golang.org/x/sys/unix"
)
const (
cmdTcGet = unix.TIOCGETA
cmdTcSet = unix.TIOCSETA
)
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
// This does not exist on FreeBSD, it does not allocate controlling terminals on open
func unlockpt(f *os.File) error {
return nil
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
if err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

37
vendor/github.com/containerd/console/tc_linux.go generated vendored Normal file
View file

@ -0,0 +1,37 @@
package console
import (
"fmt"
"os"
"unsafe"
"golang.org/x/sys/unix"
)
const (
cmdTcGet = unix.TCGETS
cmdTcSet = unix.TCSETS
)
func ioctl(fd, flag, data uintptr) error {
if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
if err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

35
vendor/github.com/containerd/console/tc_solaris_cgo.go generated vendored Normal file
View file

@ -0,0 +1,35 @@
// +build solaris,cgo
package console
import (
"os"
"golang.org/x/sys/unix"
)
//#include <stdlib.h>
import "C"
const (
cmdTcGet = unix.TCGETS
cmdTcSet = unix.TCSETS
)
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
ptspath, err := C.ptsname(C.int(f.Fd()))
if err != nil {
return "", err
}
return C.GoString(ptspath), nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
if _, err := C.grantpt(C.int(f.Fd())); err != nil {
return err
}
return nil
}

View file

@ -0,0 +1,31 @@
// +build solaris,!cgo
//
// Implementing the functions below requires cgo support. Non-cgo stubs
// versions are defined below to enable cross-compilation of source code
// that depends on these functions, but the resultant cross-compiled
// binaries cannot actually be used. If the stub function(s) below are
// actually invoked they will display an error message and cause the
// calling process to exit.
//
package console
import (
"os"
"golang.org/x/sys/unix"
)
const (
cmdTcGet = unix.TCGETS
cmdTcSet = unix.TCSETS
)
func ptsname(f *os.File) (string, error) {
panic("ptsname() support requires cgo.")
}
func unlockpt(f *os.File) error {
panic("unlockpt() support requires cgo.")
}

75
vendor/github.com/containerd/console/tc_unix.go generated vendored Normal file
View file

@ -0,0 +1,75 @@
// +build darwin freebsd linux solaris
package console
import (
"golang.org/x/sys/unix"
)
func tcget(fd uintptr, p *unix.Termios) error {
termios, err := unix.IoctlGetTermios(int(fd), cmdTcGet)
if err != nil {
return err
}
*p = *termios
return nil
}
func tcset(fd uintptr, p *unix.Termios) error {
return unix.IoctlSetTermios(int(fd), cmdTcSet, p)
}
func tcgwinsz(fd uintptr) (WinSize, error) {
var ws WinSize
uws, err := unix.IoctlGetWinsize(int(fd), unix.TIOCGWINSZ)
if err != nil {
return ws, err
}
// Translate from unix.Winsize to console.WinSize
ws.Height = uws.Row
ws.Width = uws.Col
ws.x = uws.Xpixel
ws.y = uws.Ypixel
return ws, nil
}
func tcswinsz(fd uintptr, ws WinSize) error {
// Translate from console.WinSize to unix.Winsize
var uws unix.Winsize
uws.Row = ws.Height
uws.Col = ws.Width
uws.Xpixel = ws.x
uws.Ypixel = ws.y
return unix.IoctlSetWinsize(int(fd), unix.TIOCSWINSZ, &uws)
}
func setONLCR(fd uintptr, enable bool) error {
var termios unix.Termios
if err := tcget(fd, &termios); err != nil {
return err
}
if enable {
// Set +onlcr so we can act like a real terminal
termios.Oflag |= unix.ONLCR
} else {
// Set -onlcr so we don't have to deal with \r.
termios.Oflag &^= unix.ONLCR
}
return tcset(fd, &termios)
}
func cfmakeraw(t unix.Termios) unix.Termios {
t.Iflag &^= (unix.IGNBRK | unix.BRKINT | unix.PARMRK | unix.ISTRIP | unix.INLCR | unix.IGNCR | unix.ICRNL | unix.IXON)
t.Oflag &^= unix.OPOST
t.Lflag &^= (unix.ECHO | unix.ECHONL | unix.ICANON | unix.ISIG | unix.IEXTEN)
t.Cflag &^= (unix.CSIZE | unix.PARENB)
t.Cflag &^= unix.CS8
t.Cc[unix.VMIN] = 1
t.Cc[unix.VTIME] = 0
return t
}

28
vendor/github.com/cyphar/filepath-securejoin/LICENSE generated vendored Normal file
View file

@ -0,0 +1,28 @@
Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
Copyright (C) 2017 SUSE LLC. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

65
vendor/github.com/cyphar/filepath-securejoin/README.md generated vendored Normal file
View file

@ -0,0 +1,65 @@
## `filepath-securejoin` ##
[![Build Status](https://travis-ci.org/cyphar/filepath-securejoin.svg?branch=master)](https://travis-ci.org/cyphar/filepath-securejoin)
An implementation of `SecureJoin`, a [candidate for inclusion in the Go
standard library][go#20126]. The purpose of this function is to be a "secure"
alternative to `filepath.Join`, and in particular it provides certain
guarantees that are not provided by `filepath.Join`.
This is the function prototype:
```go
func SecureJoin(root, unsafePath string) (string, error)
```
This library **guarantees** the following:
* If no error is set, the resulting string **must** be a child path of
`SecureJoin` and will not contain any symlink path components (they will all
be expanded).
* When expanding symlinks, all symlink path components **must** be resolved
relative to the provided root. In particular, this can be considered a
userspace implementation of how `chroot(2)` operates on file paths. Note that
these symlinks will **not** be expanded lexically (`filepath.Clean` is not
called on the input before processing).
* Non-existant path components are unaffected by `SecureJoin` (similar to
`filepath.EvalSymlinks`'s semantics).
* The returned path will always be `filepath.Clean`ed and thus not contain any
`..` components.
A (trivial) implementation of this function on GNU/Linux systems could be done
with the following (note that this requires root privileges and is far more
opaque than the implementation in this library, and also requires that
`readlink` is inside the `root` path):
```go
package securejoin
import (
"os/exec"
"path/filepath"
)
func SecureJoin(root, unsafePath string) (string, error) {
unsafePath = string(filepath.Separator) + unsafePath
cmd := exec.Command("chroot", root,
"readlink", "--canonicalize-missing", "--no-newline", unsafePath)
output, err := cmd.CombinedOutput()
if err != nil {
return "", err
}
expanded := string(output)
return filepath.Join(root, expanded), nil
}
```
[go#20126]: https://github.com/golang/go/issues/20126
### License ###
The license of this project is the same as Go, which is a BSD 3-clause license
available in the `LICENSE` file.

135
vendor/github.com/cyphar/filepath-securejoin/join.go generated vendored Normal file
View file

@ -0,0 +1,135 @@
// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
// Copyright (C) 2017 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package securejoin is an implementation of the hopefully-soon-to-be-included
// SecureJoin helper that is meant to be part of the "path/filepath" package.
// The purpose of this project is to provide a PoC implementation to make the
// SecureJoin proposal (https://github.com/golang/go/issues/20126) more
// tangible.
package securejoin
import (
"bytes"
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/pkg/errors"
)
// ErrSymlinkLoop is returned by SecureJoinVFS when too many symlinks have been
// evaluated in attempting to securely join the two given paths.
var ErrSymlinkLoop = fmt.Errorf("SecureJoin: too many links")
// IsNotExist tells you if err is an error that implies that either the path
// accessed does not exist (or path components don't exist). This is
// effectively a more broad version of os.IsNotExist.
func IsNotExist(err error) bool {
// If it's a bone-fide ENOENT just bail.
if os.IsNotExist(errors.Cause(err)) {
return true
}
// Check that it's not actually an ENOTDIR, which in some cases is a more
// convoluted case of ENOENT (usually involving weird paths).
var errno error
switch err := errors.Cause(err).(type) {
case *os.PathError:
errno = err.Err
case *os.LinkError:
errno = err.Err
case *os.SyscallError:
errno = err.Err
}
return errno == syscall.ENOTDIR || errno == syscall.ENOENT
}
// SecureJoinVFS joins the two given path components (similar to Join) except
// that the returned path is guaranteed to be scoped inside the provided root
// path (when evaluated). Any symbolic links in the path are evaluated with the
// given root treated as the root of the filesystem, similar to a chroot. The
// filesystem state is evaluated through the given VFS interface (if nil, the
// standard os.* family of functions are used).
//
// Note that the guarantees provided by this function only apply if the path
// components in the returned string are not modified (in other words are not
// replaced with symlinks on the filesystem) after this function has returned.
// Such a symlink race is necessarily out-of-scope of SecureJoin.
func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) {
// Use the os.* VFS implementation if none was specified.
if vfs == nil {
vfs = osVFS{}
}
var path bytes.Buffer
n := 0
for unsafePath != "" {
if n > 255 {
return "", ErrSymlinkLoop
}
// Next path component, p.
i := strings.IndexRune(unsafePath, filepath.Separator)
var p string
if i == -1 {
p, unsafePath = unsafePath, ""
} else {
p, unsafePath = unsafePath[:i], unsafePath[i+1:]
}
// Create a cleaned path, using the lexical semantics of /../a, to
// create a "scoped" path component which can safely be joined to fullP
// for evaluation. At this point, path.String() doesn't contain any
// symlink components.
cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p)
if cleanP == string(filepath.Separator) {
path.Reset()
continue
}
fullP := filepath.Clean(root + cleanP)
// Figure out whether the path is a symlink.
fi, err := vfs.Lstat(fullP)
if err != nil && !IsNotExist(err) {
return "", err
}
// Treat non-existent path components the same as non-symlinks (we
// can't do any better here).
if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 {
path.WriteString(p)
path.WriteRune(filepath.Separator)
continue
}
// Only increment when we actually dereference a link.
n++
// It's a symlink, expand it by prepending it to the yet-unparsed path.
dest, err := vfs.Readlink(fullP)
if err != nil {
return "", err
}
// Absolute symlinks reset any work we've already done.
if filepath.IsAbs(dest) {
path.Reset()
}
unsafePath = dest + string(filepath.Separator) + unsafePath
}
// We have to clean path.String() here because it may contain '..'
// components that are entirely lexical, but would be misleading otherwise.
// And finally do a final clean to ensure that root is also lexically
// clean.
fullP := filepath.Clean(string(filepath.Separator) + path.String())
return filepath.Clean(root + fullP), nil
}
// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library
// of functions as the VFS. If in doubt, use this function over SecureJoinVFS.
func SecureJoin(root, unsafePath string) (string, error) {
return SecureJoinVFS(root, unsafePath, nil)
}

View file

@ -0,0 +1 @@
github.com/pkg/errors v0.8.0

41
vendor/github.com/cyphar/filepath-securejoin/vfs.go generated vendored Normal file
View file

@ -0,0 +1,41 @@
// Copyright (C) 2017 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import "os"
// In future this should be moved into a separate package, because now there
// are several projects (umoci and go-mtree) that are using this sort of
// interface.
// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is
// equivalent to using the standard os.* family of functions. This is mainly
// used for the purposes of mock testing, but also can be used to otherwise use
// SecureJoin with VFS-like system.
type VFS interface {
// Lstat returns a FileInfo describing the named file. If the file is a
// symbolic link, the returned FileInfo describes the symbolic link. Lstat
// makes no attempt to follow the link. These semantics are identical to
// os.Lstat.
Lstat(name string) (os.FileInfo, error)
// Readlink returns the destination of the named symbolic link. These
// semantics are identical to os.Readlink.
Readlink(name string) (string, error)
}
// osVFS is the "nil" VFS, in that it just passes everything through to the os
// module.
type osVFS struct{}
// Lstat returns a FileInfo describing the named file. If the file is a
// symbolic link, the returned FileInfo describes the symbolic link. Lstat
// makes no attempt to follow the link. These semantics are identical to
// os.Lstat.
func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) }
// Readlink returns the destination of the named symbolic link. These
// semantics are identical to os.Readlink.
func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) }

View file

@ -56,7 +56,7 @@ make BUILDTAGS='seccomp apparmor'
|-----------|------------------------------------|-------------| |-----------|------------------------------------|-------------|
| seccomp | Syscall filtering | libseccomp | | seccomp | Syscall filtering | libseccomp |
| selinux | selinux process and mount labeling | <none> | | selinux | selinux process and mount labeling | <none> |
| apparmor | apparmor profile support | libapparmor | | apparmor | apparmor profile support | <none> |
| ambient | ambient capability support | kernel 4.3 | | ambient | ambient capability support | kernel 4.3 |

View file

@ -2,15 +2,10 @@
package apparmor package apparmor
// #cgo LDFLAGS: -lapparmor
// #include <sys/apparmor.h>
// #include <stdlib.h>
import "C"
import ( import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"unsafe"
) )
// IsEnabled returns true if apparmor is enabled for the host. // IsEnabled returns true if apparmor is enabled for the host.
@ -24,16 +19,36 @@ func IsEnabled() bool {
return false return false
} }
func setprocattr(attr, value string) error {
// Under AppArmor you can only change your own attr, so use /proc/self/
// instead of /proc/<tid>/ like libapparmor does
path := fmt.Sprintf("/proc/self/attr/%s", attr)
f, err := os.OpenFile(path, os.O_WRONLY, 0)
if err != nil {
return err
}
defer f.Close()
_, err = fmt.Fprintf(f, "%s", value)
return err
}
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
func changeOnExec(name string) error {
value := "exec " + name
if err := setprocattr("exec", value); err != nil {
return fmt.Errorf("apparmor failed to apply profile: %s", err)
}
return nil
}
// ApplyProfile will apply the profile with the specified name to the process after // ApplyProfile will apply the profile with the specified name to the process after
// the next exec. // the next exec.
func ApplyProfile(name string) error { func ApplyProfile(name string) error {
if name == "" { if name == "" {
return nil return nil
} }
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName)) return changeOnExec(name)
if _, err := C.aa_change_onexec(cName); err != nil {
return fmt.Errorf("apparmor failed to apply profile: %s", err)
}
return nil
} }

View file

@ -145,8 +145,17 @@ func (m *Manager) Apply(pid int) (err error) {
m.Paths[sys.Name()] = p m.Paths[sys.Name()] = p
if err := sys.Apply(d); err != nil { if err := sys.Apply(d); err != nil {
if os.IsPermission(err) && m.Cgroups.Path == "" {
// If we didn't set a cgroup path, then let's defer the error here
// until we know whether we have set limits or not.
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
// it will have the same limits as its parent.
delete(m.Paths, sys.Name())
continue
}
return err return err
} }
} }
return nil return nil
} }
@ -198,6 +207,10 @@ func (m *Manager) Set(container *configs.Config) error {
for _, sys := range subsystems { for _, sys := range subsystems {
path := paths[sys.Name()] path := paths[sys.Name()]
if err := sys.Set(path, container.Cgroups); err != nil { if err := sys.Set(path, container.Cgroups); err != nil {
if path == "" {
// cgroup never applied
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
}
return err return err
} }
} }

View file

@ -29,11 +29,15 @@ func (s *FreezerGroup) Apply(d *cgroupData) error {
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
switch cgroup.Resources.Freezer { switch cgroup.Resources.Freezer {
case configs.Frozen, configs.Thawed: case configs.Frozen, configs.Thawed:
for {
// In case this loop does not exit because it doesn't get the expected
// state, let's write again this state, hoping it's going to be properly
// set this time. Otherwise, this loop could run infinitely, waiting for
// a state change that would never happen.
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
return err return err
} }
for {
state, err := readFile(path, "freezer.state") state, err := readFile(path, "freezer.state")
if err != nil { if err != nil {
return err return err
@ -41,6 +45,7 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
break break
} }
time.Sleep(1 * time.Millisecond) time.Sleep(1 * time.Millisecond)
} }
case configs.Undefined: case configs.Undefined:

View file

@ -1,128 +0,0 @@
// +build linux
package rootless
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.
var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}
type subsystem interface {
// Name returns the name of the subsystem.
Name() string
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}
// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}
// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}
// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()
path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}
paths[name] = path
}
m.Paths = paths
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
}
func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}
func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}

View file

@ -1,4 +1,4 @@
// +build !linux // +build !linux static_build
package systemd package systemd
@ -43,7 +43,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
} }
func (m *Manager) Set(container *configs.Config) error { func (m *Manager) Set(container *configs.Config) error {
return nil, fmt.Errorf("Systemd not supported") return fmt.Errorf("Systemd not supported")
} }
func (m *Manager) Freeze(state configs.FreezerState) error { func (m *Manager) Freeze(state configs.FreezerState) error {

View file

@ -1,4 +1,4 @@
// +build linux // +build linux,!static_build
package systemd package systemd
@ -260,7 +260,7 @@ func (m *Manager) Apply(pid int) error {
if c.Resources.Memory != 0 { if c.Resources.Memory != 0 {
properties = append(properties, properties = append(properties,
newProp("MemoryLimit", c.Resources.Memory)) newProp("MemoryLimit", uint64(c.Resources.Memory)))
} }
if c.Resources.CpuShares != 0 { if c.Resources.CpuShares != 0 {
@ -271,6 +271,13 @@ func (m *Manager) Apply(pid int) error {
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 { if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
properties = append(properties, properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
} }
@ -288,10 +295,13 @@ func (m *Manager) Apply(pid int) error {
} }
} }
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil && !isUnitExists(err) { statusChan := make(chan string)
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) {
return err return err
} }
<-statusChan
if err := joinCgroups(c, pid); err != nil { if err := joinCgroups(c, pid); err != nil {
return err return err
} }

View file

@ -1,10 +0,0 @@
// +build linux,!go1.5
package libcontainer
import "syscall"
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
// with earlier versions
func enableSetgroups(sys *syscall.SysProcAttr) {
}

View file

@ -1,6 +0,0 @@
// +build !windows,!linux,!freebsd
package configs
type Cgroup struct {
}

View file

@ -187,6 +187,10 @@ type Config struct {
// Rootless specifies whether the container is a rootless container. // Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"` Rootless bool `json:"rootless"`
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
// to limit the resources (e.g., L3 cache) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
} }
type Hooks struct { type Hooks struct {

View file

@ -1,4 +1,4 @@
// +build linux freebsd // +build linux
package configs package configs

View file

@ -0,0 +1,7 @@
package configs
type IntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}

View file

@ -21,13 +21,6 @@ func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMount(config); err != nil { if err := rootlessMount(config); err != nil {
return err return err
} }
// Currently, cgroups cannot effectively be used in rootless containers.
// The new cgroup namespace doesn't really help us either because it doesn't
// have nice interactions with the user namespace (we're working with upstream
// to fix this).
if err := rootlessCgroup(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because // XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this // configs.Config doesn't store the user-related configs. So this
@ -36,37 +29,27 @@ func (v *ConfigValidator) rootless(config *configs.Config) error {
return nil return nil
} }
func rootlessMappings(config *configs.Config) error { func hasIDMapping(id int, mappings []configs.IDMap) bool {
rootuid, err := config.HostRootUID() for _, m := range mappings {
if err != nil { if id >= m.ContainerID && id < m.ContainerID+m.Size {
return fmt.Errorf("failed to get root uid from uidMappings: %v", err) return true
} }
}
return false
}
func rootlessMappings(config *configs.Config) error {
if euid := geteuid(); euid != 0 { if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) { if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces") return fmt.Errorf("rootless containers require user namespaces")
} }
if rootuid != euid {
return fmt.Errorf("rootless containers cannot map container root to a different host user")
}
} }
rootgid, err := config.HostRootGID() if len(config.UidMappings) == 0 {
if err != nil { return fmt.Errorf("rootless containers requires at least one UID mapping")
return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
} }
if len(config.GidMappings) == 0 {
// Similar to the above test, we need to make sure that we aren't trying to return fmt.Errorf("rootless containers requires at least one UID mapping")
// map to a group ID that we don't have the right to be.
if rootgid != getegid() {
return fmt.Errorf("rootless containers cannot map container root to a different host group")
}
// We can only map one user and group inside a container (our own).
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one user")
}
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one group")
} }
return nil return nil
@ -104,11 +87,28 @@ func rootlessMount(config *configs.Config) error {
// Check that the options list doesn't contain any uid= or gid= entries // Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root. // that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") { for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") && opt != "uid=0" { if strings.HasPrefix(opt, "uid=") {
return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0") var uid int
n, err := fmt.Sscanf(opt, "uid=%d", &uid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(uid, config.UidMappings) {
return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
}
}
if strings.HasPrefix(opt, "gid=") {
var gid int
n, err := fmt.Sscanf(opt, "gid=%d", &gid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(gid, config.GidMappings) {
return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
} }
if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
} }
} }
} }

View file

@ -7,6 +7,7 @@ import (
"strings" "strings"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
selinux "github.com/opencontainers/selinux/go-selinux" selinux "github.com/opencontainers/selinux/go-selinux"
) )
@ -40,6 +41,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.sysctl(config); err != nil { if err := v.sysctl(config); err != nil {
return err return err
} }
if err := v.intelrdt(config); err != nil {
return err
}
if config.Rootless { if config.Rootless {
if err := v.rootless(config); err != nil { if err := v.rootless(config); err != nil {
return err return err
@ -153,6 +157,19 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
return nil return nil
} }
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
if config.IntelRdt != nil {
if !intelrdt.IsEnabled() {
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
}
if config.IntelRdt.L3CacheSchema == "" {
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
}
}
return nil
}
func isSymbolicLink(path string) (bool, error) { func isSymbolicLink(path string) (bool, error) {
fi, err := os.Lstat(path) fi, err := os.Lstat(path)
if err != nil { if err != nil {

View file

@ -1,17 +0,0 @@
package libcontainer
import (
"io"
"os"
)
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriteCloser
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
File() *os.File
}

View file

@ -1,13 +0,0 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View file

@ -1,71 +1,14 @@
package libcontainer package libcontainer
import ( import (
"fmt"
"os" "os"
"unsafe"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
func ConsoleFromFile(f *os.File) Console {
return &linuxConsole{
master: f,
}
}
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// linuxConsole is a linux pseudo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) File() *os.File {
return c.master
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label // mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console. // and applying the correct ownership of the console.
func (c *linuxConsole) mount() error { func mountConsole(slavePath string) error {
oldMask := unix.Umask(0000) oldMask := unix.Umask(0000)
defer unix.Umask(oldMask) defer unix.Umask(oldMask)
f, err := os.Create("/dev/console") f, err := os.Create("/dev/console")
@ -75,17 +18,20 @@ func (c *linuxConsole) mount() error {
if f != nil { if f != nil {
f.Close() f.Close()
} }
return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "") return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
} }
// dupStdio opens the slavePath for the console and dups the fds to the current // dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2. // processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error { func dupStdio(slavePath string) error {
slave, err := c.open(unix.O_RDWR) fd, err := unix.Open(slavePath, unix.O_RDWR, 0)
if err != nil { if err != nil {
return err return &os.PathError{
Op: "open",
Path: slavePath,
Err: err,
}
} }
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} { for _, i := range []int{0, 1, 2} {
if err := unix.Dup3(fd, i, 0); err != nil { if err := unix.Dup3(fd, i, 0); err != nil {
return err return err
@ -93,60 +39,3 @@ func (c *linuxConsole) dupStdio() error {
} }
return nil return nil
} }
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := unix.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
if err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}
// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
// created by us acts normally. In particular, a not-very-well-known default of
// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
// problem for terminal emulators, because we relay data from the terminal we
// also relay that funky line discipline.
func SaneTerminal(terminal *os.File) error {
termios, err := unix.IoctlGetTermios(int(terminal.Fd()), unix.TCGETS)
if err != nil {
return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
}
// Set -onlcr so we don't have to deal with \r.
termios.Oflag &^= unix.ONLCR
if err := unix.IoctlSetTermios(int(terminal.Fd()), unix.TCSETS, termios); err != nil {
return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
}
return nil
}

View file

@ -1,11 +0,0 @@
package libcontainer
import (
"errors"
)
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View file

@ -1,30 +0,0 @@
package libcontainer
// newConsole returns an initialized console that can be used within a container
func newConsole() (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows pseudo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View file

@ -5,6 +5,7 @@ package libcontainer
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
@ -21,6 +22,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc" "github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
@ -38,10 +40,14 @@ type linuxContainer struct {
root string root string
config *configs.Config config *configs.Config
cgroupManager cgroups.Manager cgroupManager cgroups.Manager
intelRdtManager intelrdt.Manager
initPath string
initArgs []string initArgs []string
initProcess parentProcess initProcess parentProcess
initProcessStartTime uint64 initProcessStartTime uint64
criuPath string criuPath string
newuidmapPath string
newgidmapPath string
m sync.Mutex m sync.Mutex
criuVersion int criuVersion int
state containerState state containerState
@ -67,6 +73,9 @@ type State struct {
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
ExternalDescriptors []string `json:"external_descriptors,omitempty"` ExternalDescriptors []string `json:"external_descriptors,omitempty"`
// Intel RDT "resource control" filesystem path
IntelRdtPath string `json:"intel_rdt_path"`
} }
// Container is a libcontainer container object. // Container is a libcontainer container object.
@ -163,6 +172,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
} }
if c.intelRdtManager != nil {
if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
}
}
for _, iface := range c.config.Networks { for _, iface := range c.config.Networks {
switch iface.Type { switch iface.Type {
case "veth": case "veth":
@ -186,8 +200,26 @@ func (c *linuxContainer) Set(config configs.Config) error {
if status == Stopped { if status == Stopped {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning) return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
} }
if err := c.cgroupManager.Set(&config); err != nil {
// Set configs back
if err2 := c.cgroupManager.Set(c.config); err2 != nil {
logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
}
return err
}
if c.intelRdtManager != nil {
if err := c.intelRdtManager.Set(&config); err != nil {
// Set configs back
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
}
return err
}
}
// After config setting succeed, update config and states
c.config = &config c.config = &config
return c.cgroupManager.Set(c.config) _, err = c.updateState(nil)
return err
} }
func (c *linuxContainer) Start(process *Process) error { func (c *linuxContainer) Start(process *Process) error {
@ -236,20 +268,71 @@ func (c *linuxContainer) Exec() error {
func (c *linuxContainer) exec() error { func (c *linuxContainer) exec() error {
path := filepath.Join(c.root, execFifoFilename) path := filepath.Join(c.root, execFifoFilename)
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil { fifoOpen := make(chan struct{})
return newSystemErrorWithCause(err, "open exec fifo for reading") select {
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
return errors.New("container process is already dead")
case result := <-awaitFifoOpen(path):
close(fifoOpen)
if result.err != nil {
return result.err
} }
f := result.file
defer f.Close() defer f.Close()
data, err := ioutil.ReadAll(f) if err := readFromExecFifo(f); err != nil {
return err
}
return os.Remove(path)
}
}
func readFromExecFifo(execFifo io.Reader) error {
data, err := ioutil.ReadAll(execFifo)
if err != nil { if err != nil {
return err return err
} }
if len(data) > 0 { if len(data) <= 0 {
os.Remove(path)
return nil
}
return fmt.Errorf("cannot start an already running container") return fmt.Errorf("cannot start an already running container")
}
return nil
}
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
isDead := make(chan struct{})
go func() {
for {
select {
case <-exit:
return
case <-time.After(time.Millisecond * 100):
stat, err := system.Stat(pid)
if err != nil || stat.State == system.Zombie {
close(isDead)
return
}
}
}
}()
return isDead
}
func awaitFifoOpen(path string) <-chan openResult {
fifoOpened := make(chan openResult)
go func() {
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
return
}
fifoOpened <- openResult{file: f}
}()
return fifoOpened
}
type openResult struct {
file *os.File
err error
} }
func (c *linuxContainer) start(process *Process, isInit bool) error { func (c *linuxContainer) start(process *Process, isInit bool) error {
@ -259,7 +342,7 @@ func (c *linuxContainer) start(process *Process, isInit bool) error {
} }
if err := parent.start(); err != nil { if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped. // terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil { if err := ignoreTerminateErrors(parent.terminate()); err != nil {
logrus.Warn(err) logrus.Warn(err)
} }
return newSystemErrorWithCause(err, "starting container process") return newSystemErrorWithCause(err, "starting container process")
@ -277,15 +360,17 @@ func (c *linuxContainer) start(process *Process, isInit bool) error {
c.initProcessStartTime = state.InitProcessStartTime c.initProcessStartTime = state.InitProcessStartTime
if c.config.Hooks != nil { if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{ s := configs.HookState{
Version: c.config.Version, Version: c.config.Version,
ID: c.id, ID: c.id,
Pid: parent.pid(), Pid: parent.pid(),
Bundle: utils.SearchLabels(c.config.Labels, "bundle"), Bundle: bundle,
Annotations: annotations,
} }
for i, hook := range c.config.Hooks.Poststart { for i, hook := range c.config.Hooks.Poststart {
if err := hook.Run(s); err != nil { if err := hook.Run(s); err != nil {
if err := parent.terminate(); err != nil { if err := ignoreTerminateErrors(parent.terminate()); err != nil {
logrus.Warn(err) logrus.Warn(err)
} }
return newSystemErrorWithCausef(err, "running poststart hook %d", i) return newSystemErrorWithCausef(err, "running poststart hook %d", i)
@ -341,6 +426,23 @@ func (c *linuxContainer) deleteExecFifo() {
os.Remove(fifoName) os.Remove(fifoName)
} }
// includeExecFifo opens the container's execfifo as a pathfd, so that the
// container cannot access the statedir (and the FIFO itself remains
// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited
// fd, with _LIBCONTAINER_FIFOFD set to its fd number.
func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
fifoName := filepath.Join(c.root, execFifoFilename)
fifoFd, err := unix.Open(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return err
}
cmd.ExtraFiles = append(cmd.ExtraFiles, os.NewFile(uintptr(fifoFd), fifoName))
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_FIFOFD=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
return nil
}
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) { func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
parentPipe, childPipe, err := utils.NewSockPair("init") parentPipe, childPipe, err := utils.NewSockPair("init")
if err != nil { if err != nil {
@ -354,22 +456,20 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces
return c.newSetnsProcess(p, cmd, parentPipe, childPipe) return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
} }
// We only set up rootDir if we're not doing a `runc exec`. The reason for // We only set up fifoFd if we're not doing a `runc exec`. The historic
// this is to avoid cases where a racing, unprivileged process inside the // reason for this is that previously we would pass a dirfd that allowed
// container can get access to the statedir file descriptor (which would // for container rootfs escape (and not doing it in `runc exec` avoided
// allow for container rootfs escape). // that problem), but we no longer do that. However, there's no need to do
rootDir, err := os.Open(c.root) // this for `runc exec` so we just keep it this way to be safe.
if err != nil { if err := c.includeExecFifo(cmd); err != nil {
return nil, err return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
} }
cmd.ExtraFiles = append(cmd.ExtraFiles, rootDir) return c.newInitProcess(p, cmd, parentPipe, childPipe)
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
} }
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) { func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) cmd := exec.Command(c.initPath, c.initArgs[1:]...)
cmd.Args[0] = c.initArgs[0]
cmd.Stdin = p.Stdin cmd.Stdin = p.Stdin
cmd.Stdout = p.Stdout cmd.Stdout = p.Stdout
cmd.Stderr = p.Stderr cmd.Stderr = p.Stderr
@ -397,7 +497,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
return cmd, nil return cmd, nil
} }
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) { func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string) nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces { for _, ns := range c.config.Namespaces {
@ -415,12 +515,12 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
childPipe: childPipe, childPipe: childPipe,
parentPipe: parentPipe, parentPipe: parentPipe,
manager: c.cgroupManager, manager: c.cgroupManager,
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p), config: c.newInitConfig(p),
container: c, container: c,
process: p, process: p,
bootstrapData: data, bootstrapData: data,
sharePidns: sharePidns, sharePidns: sharePidns,
rootDir: rootDir,
}, nil }, nil
} }
@ -439,6 +539,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
return &setnsProcess{ return &setnsProcess{
cmd: cmd, cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(), cgroupPaths: c.cgroupManager.GetPaths(),
intelRdtPath: state.IntelRdtPath,
childPipe: childPipe, childPipe: childPipe,
parentPipe: parentPipe, parentPipe: parentPipe,
config: c.newInitConfig(p), config: c.newInitConfig(p),
@ -477,6 +578,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
cfg.Rlimits = process.Rlimits cfg.Rlimits = process.Rlimits
} }
cfg.CreateConsole = process.ConsoleSocket != nil cfg.CreateConsole = process.ConsoleSocket != nil
cfg.ConsoleWidth = process.ConsoleWidth
cfg.ConsoleHeight = process.ConsoleHeight
return cfg return cfg
} }
@ -578,10 +681,25 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
logrus.Debugf("Feature check says: %s", criuFeatures) logrus.Debugf("Feature check says: %s", criuFeatures)
missingFeatures := false missingFeatures := false
// The outer if checks if the fields actually exist
if (criuFeat.MemTrack != nil) &&
(criuFeatures.MemTrack != nil) {
// The inner if checks if they are set to true
if *criuFeat.MemTrack && !*criuFeatures.MemTrack { if *criuFeat.MemTrack && !*criuFeatures.MemTrack {
missingFeatures = true missingFeatures = true
logrus.Debugf("CRIU does not support MemTrack") logrus.Debugf("CRIU does not support MemTrack")
} }
}
// This needs to be repeated for every new feature check.
// Is there a way to put this in a function. Reflection?
if (criuFeat.LazyPages != nil) &&
(criuFeatures.LazyPages != nil) {
if *criuFeat.LazyPages && !*criuFeatures.LazyPages {
missingFeatures = true
logrus.Debugf("CRIU does not support LazyPages")
}
}
if missingFeatures { if missingFeatures {
return fmt.Errorf("CRIU is missing features") return fmt.Errorf("CRIU is missing features")
@ -610,9 +728,9 @@ func parseCriuVersion(path string) (int, error) {
return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path) return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path)
} }
n, err := fmt.Sscanf(string(version), "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2 n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
if err != nil { if err != nil {
n, err = fmt.Sscanf(string(version), "GitID: v%d.%d", &x, &y) // 1.6 n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6
y++ y++
} else { } else {
z++ z++
@ -736,6 +854,25 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
} }
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
} }
return nil
}
func waitForCriuLazyServer(r *os.File, status string) error {
data := make([]byte, 1)
_, err := r.Read(data)
if err != nil {
return err
}
fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
if err != nil {
return err
}
_, err = fd.Write(data)
if err != nil {
return err
}
fd.Close()
return nil return nil
} }
@ -802,6 +939,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
FileLocks: proto.Bool(criuOpts.FileLocks), FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs), EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true), OrphanPtsMaster: proto.Bool(true),
AutoDedup: proto.Bool(criuOpts.AutoDedup),
LazyPages: proto.Bool(criuOpts.LazyPages),
} }
fcg := c.cgroupManager.GetPaths()["freezer"] fcg := c.cgroupManager.GetPaths()["freezer"]
@ -852,6 +991,24 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
Opts: &rpcOpts, Opts: &rpcOpts,
} }
if criuOpts.LazyPages {
// lazy migration requested; check if criu supports it
feat := criurpc.CriuFeatures{
LazyPages: proto.Bool(true),
}
if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
return err
}
statusRead, statusWrite, err := os.Pipe()
if err != nil {
return err
}
rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
}
//no need to dump these information in pre-dump //no need to dump these information in pre-dump
if !criuOpts.PreDump { if !criuOpts.PreDump {
for _, m := range c.config.Mounts { for _, m := range c.config.Mounts {
@ -1003,6 +1160,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
FileLocks: proto.Bool(criuOpts.FileLocks), FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs), EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true), OrphanPtsMaster: proto.Bool(true),
AutoDedup: proto.Bool(criuOpts.AutoDedup),
LazyPages: proto.Bool(criuOpts.LazyPages),
}, },
} }
@ -1331,11 +1490,13 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
} }
case notify.GetScript() == "setup-namespaces": case notify.GetScript() == "setup-namespaces":
if c.config.Hooks != nil { if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{ s := configs.HookState{
Version: c.config.Version, Version: c.config.Version,
ID: c.id, ID: c.id,
Pid: int(notify.GetPid()), Pid: int(notify.GetPid()),
Bundle: utils.SearchLabels(c.config.Labels, "bundle"), Bundle: bundle,
Annotations: annotations,
} }
for i, hook := range c.config.Hooks.Prestart { for i, hook := range c.config.Hooks.Prestart {
if err := hook.Run(s); err != nil { if err := hook.Run(s); err != nil {
@ -1380,7 +1541,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
defer master.Close() defer master.Close()
// While we can access console.master, using the API is a good idea. // While we can access console.master, using the API is a good idea.
if err := utils.SendFd(process.ConsoleSocket, master); err != nil { if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil {
return err return err
} }
} }
@ -1388,7 +1549,9 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
} }
func (c *linuxContainer) updateState(process parentProcess) (*State, error) { func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
if process != nil {
c.initProcess = process c.initProcess = process
}
state, err := c.currentState() state, err := c.currentState()
if err != nil { if err != nil {
return nil, err return nil, err
@ -1493,6 +1656,10 @@ func (c *linuxContainer) currentState() (*State, error) {
startTime, _ = c.initProcess.startTime() startTime, _ = c.initProcess.startTime()
externalDescriptors = c.initProcess.externalDescriptors() externalDescriptors = c.initProcess.externalDescriptors()
} }
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
if err != nil {
intelRdtPath = ""
}
state := &State{ state := &State{
BaseState: BaseState{ BaseState: BaseState{
ID: c.ID(), ID: c.ID(),
@ -1503,6 +1670,7 @@ func (c *linuxContainer) currentState() (*State, error) {
}, },
Rootless: c.config.Rootless, Rootless: c.config.Rootless,
CgroupPaths: c.cgroupManager.GetPaths(), CgroupPaths: c.cgroupManager.GetPaths(),
IntelRdtPath: intelRdtPath,
NamespacePaths: make(map[configs.NamespaceType]string), NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: externalDescriptors, ExternalDescriptors: externalDescriptors,
} }
@ -1601,6 +1769,12 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
if !joinExistingUser { if !joinExistingUser {
// write uid mappings // write uid mappings
if len(c.config.UidMappings) > 0 { if len(c.config.UidMappings) > 0 {
if c.config.Rootless && c.newuidmapPath != "" {
r.AddData(&Bytemsg{
Type: UidmapPathAttr,
Value: []byte(c.newuidmapPath),
})
}
b, err := encodeIDMapping(c.config.UidMappings) b, err := encodeIDMapping(c.config.UidMappings)
if err != nil { if err != nil {
return nil, err return nil, err
@ -1621,6 +1795,12 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Type: GidmapAttr, Type: GidmapAttr,
Value: b, Value: b,
}) })
if c.config.Rootless && c.newgidmapPath != "" {
r.AddData(&Bytemsg{
Type: GidmapPathAttr,
Value: []byte(c.newgidmapPath),
})
}
// The following only applies if we are root. // The following only applies if we are root.
if !c.config.Rootless { if !c.config.Rootless {
// check if we have CAP_SETGID to setgroup properly // check if we have CAP_SETGID to setgroup properly
@ -1652,3 +1832,18 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
return bytes.NewReader(r.Serialize()), nil return bytes.NewReader(r.Serialize()), nil
} }
// ignoreTerminateErrors returns nil if the given err matches an error known
// to indicate that the terminate occurred successfully or err was nil, otherwise
// err is returned unaltered.
func ignoreTerminateErrors(err error) error {
if err == nil {
return nil
}
s := err.Error()
switch {
case strings.Contains(s, "process already finished"), strings.Contains(s, "Wait was already called"):
return nil
}
return err
}

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -23,7 +23,7 @@ type VethPairName struct {
type CriuOpts struct { type CriuOpts struct {
ImagesDirectory string // directory for storing image files ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to WorkDirectory string // directory to cd and write logs/pidfiles/stats to
ParentImage string // direcotry for storing parent image files in pre-dump and dump ParentImage string // directory for storing parent image files in pre-dump and dump
LeaveRunning bool // leave container in running state after checkpoint LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections ExternalUnixConnections bool // allow external unix connections
@ -34,4 +34,7 @@ type CriuOpts struct {
VethPairs []VethPairName // pass the veth to criu when restore VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode ManageCgroupsMode cgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask EmptyNs uint32 // don't c/r properties for namespace from this mask
AutoDedup bool // auto deduplication for incremental dumps
LazyPages bool // restore memory pages lazily using userfaultfd
StatusFd string // fd for feedback when lazy server is ready
} }

View file

@ -1,6 +0,0 @@
package libcontainer
// TODO Windows: This can ultimately be entirely factored out as criu is
// a Unix concept not relevant on Windows.
type CriuOpts struct {
}

View file

@ -28,6 +28,15 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
var (
devNumber = stat.Rdev
major = unix.Major(devNumber)
)
if major == 0 {
return nil, ErrNotADevice
}
var ( var (
devType rune devType rune
mode = stat.Mode mode = stat.Mode
@ -37,21 +46,16 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
devType = 'b' devType = 'b'
case mode&unix.S_IFCHR == unix.S_IFCHR: case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = 'c' devType = 'c'
default:
return nil, ErrNotADevice
} }
devNumber := int(stat.Rdev)
uid := stat.Uid
gid := stat.Gid
return &configs.Device{ return &configs.Device{
Type: devType, Type: devType,
Path: path, Path: path,
Major: Major(devNumber), Major: int64(major),
Minor: Minor(devNumber), Minor: int64(unix.Minor(devNumber)),
Permissions: permissions, Permissions: permissions,
FileMode: os.FileMode(mode), FileMode: os.FileMode(mode),
Uid: uid, Uid: stat.Uid,
Gid: gid, Gid: stat.Gid,
}, nil }, nil
} }

View file

@ -1,3 +0,0 @@
// +build !linux
package devices

View file

@ -1,24 +0,0 @@
// +build linux freebsd
package devices
/*
This code provides support for manipulating linux device numbers. It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved.
You can read what they are here:
- http://www.makelinux.net/ldd3/chp-3-sect-2
- http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94
Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in <linux/kdev_t.h> as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9
*/
func Major(devNumber int) int64 {
return int64((devNumber >> 8) & 0xfff)
}
func Minor(devNumber int) int64 {
return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00))
}

View file

@ -11,13 +11,13 @@ import (
"runtime/debug" "runtime/debug"
"strconv" "strconv"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate" "github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/mount"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
@ -72,15 +72,15 @@ func Cgroupfs(l *LinuxFactory) error {
return nil return nil
} }
// RootlessCgroups is an options func to configure a LinuxFactory to // IntelRdtfs is an options func to configure a LinuxFactory to return
// return containers that use the "rootless" cgroup manager, which will // containers that use the Intel RDT "resource control" filesystem to
// fail to do any operations not possible to do with an unprivileged user. // create and manage Intel Xeon platform shared resources (e.g., L3 cache).
// It should only be used in conjunction with rootless containers. func IntelRdtFs(l *LinuxFactory) error {
func RootlessCgroups(l *LinuxFactory) error { l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { return &intelrdt.IntelRdtManager{
return &rootless.Manager{ Config: config,
Cgroups: config, Id: id,
Paths: paths, Path: path,
} }
} }
return nil return nil
@ -119,12 +119,16 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
} }
l := &LinuxFactory{ l := &LinuxFactory{
Root: root, Root: root,
InitArgs: []string{"/proc/self/exe", "init"}, InitPath: "/proc/self/exe",
InitArgs: []string{os.Args[0], "init"},
Validator: validate.New(), Validator: validate.New(),
CriuPath: "criu", CriuPath: "criu",
} }
Cgroupfs(l) Cgroupfs(l)
for _, opt := range options { for _, opt := range options {
if opt == nil {
continue
}
if err := opt(l); err != nil { if err := opt(l); err != nil {
return nil, err return nil, err
} }
@ -137,6 +141,10 @@ type LinuxFactory struct {
// Root directory for the factory to store state. // Root directory for the factory to store state.
Root string Root string
// InitPath is the path for calling the init responsibilities for spawning
// a container.
InitPath string
// InitArgs are arguments for calling the init responsibilities for spawning // InitArgs are arguments for calling the init responsibilities for spawning
// a container. // a container.
InitArgs []string InitArgs []string
@ -145,11 +153,19 @@ type LinuxFactory struct {
// containers. // containers.
CriuPath string CriuPath string
// New{u,g}uidmapPath is the path to the binaries used for mapping with
// rootless containers.
NewuidmapPath string
NewgidmapPath string
// Validator provides validation to container configurations. // Validator provides validation to container configurations.
Validator validate.Validator Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container. // NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
} }
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@ -174,17 +190,20 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil { if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
return nil, newGenericError(err, SystemError) return nil, newGenericError(err, SystemError)
} }
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{ c := &linuxContainer{
id: id, id: id,
root: containerRoot, root: containerRoot,
config: config, config: config,
initPath: l.InitPath,
initArgs: l.InitArgs, initArgs: l.InitArgs,
criuPath: l.CriuPath, criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
} }
if intelrdt.IsEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c} c.state = &stoppedState{c: c}
return c, nil return c, nil
} }
@ -203,17 +222,16 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
processStartTime: state.InitProcessStartTime, processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors, fds: state.ExternalDescriptors,
} }
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{ c := &linuxContainer{
initProcess: r, initProcess: r,
initProcessStartTime: state.InitProcessStartTime, initProcessStartTime: state.InitProcessStartTime,
id: id, id: id,
config: &state.Config, config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs, initArgs: l.InitArgs,
criuPath: l.CriuPath, criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot, root: containerRoot,
created: state.Created, created: state.Created,
@ -222,6 +240,9 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if err := c.refreshState(); err != nil { if err := c.refreshState(); err != nil {
return nil, err return nil, err
} }
if intelrdt.IsEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
}
return c, nil return c, nil
} }
@ -233,10 +254,10 @@ func (l *LinuxFactory) Type() string {
// This is a low level implementation detail of the reexec and should not be consumed externally // This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) { func (l *LinuxFactory) StartInitialization() (err error) {
var ( var (
pipefd, rootfd int pipefd, fifofd int
consoleSocket *os.File consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE") envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envStateDir = os.Getenv("_LIBCONTAINER_STATEDIR") envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE") envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
) )
@ -252,11 +273,11 @@ func (l *LinuxFactory) StartInitialization() (err error) {
) )
defer pipe.Close() defer pipe.Close()
// Only init processes have STATEDIR. // Only init processes have FIFOFD.
rootfd = -1 fifofd = -1
if it == initStandard { if it == initStandard {
if rootfd, err = strconv.Atoi(envStateDir); err != nil { if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_STATEDIR=%s to int: %s", envStateDir, err) return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
} }
} }
@ -291,7 +312,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
} }
}() }()
i, err := newContainerInit(it, pipe, consoleSocket, rootfd) i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
if err != nil { if err != nil {
return err return err
} }
@ -323,3 +344,21 @@ func (l *LinuxFactory) validateID(id string) error {
return nil return nil
} }
// NewuidmapPath returns an option func to configure a LinuxFactory with the
// provided ..
func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.NewuidmapPath = newuidmapPath
return nil
}
}
// NewgidmapPath returns an option func to configure a LinuxFactory with the
// provided ..
func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.NewgidmapPath = newgidmapPath
return nil
}
}

View file

@ -12,15 +12,16 @@ import (
"syscall" // only for Errno "syscall" // only for Errno
"unsafe" "unsafe"
"golang.org/x/sys/unix"
"github.com/containerd/console"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/vishvananda/netlink" "github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
) )
type initType string type initType string
@ -32,6 +33,7 @@ const (
type pid struct { type pid struct {
Pid int `json:"pid"` Pid int `json:"pid"`
PidFirstChild int `json:"pid_first"`
} }
// network is an internal struct used to setup container networks. // network is an internal struct used to setup container networks.
@ -60,6 +62,8 @@ type initConfig struct {
ContainerId string `json:"containerid"` ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"` Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"` CreateConsole bool `json:"create_console"`
ConsoleWidth uint16 `json:"console_width"`
ConsoleHeight uint16 `json:"console_height"`
Rootless bool `json:"rootless"` Rootless bool `json:"rootless"`
} }
@ -67,7 +71,7 @@ type initer interface {
Init() error Init() error
} }
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, stateDirFD int) (initer, error) { func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
var config *initConfig var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil { if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err return nil, err
@ -88,7 +92,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, stateDi
consoleSocket: consoleSocket, consoleSocket: consoleSocket,
parentPid: unix.Getppid(), parentPid: unix.Getppid(),
config: config, config: config,
stateDirFD: stateDirFD, fifoFd: fifoFd,
}, nil }, nil
} }
return nil, fmt.Errorf("unknown init type %q", t) return nil, fmt.Errorf("unknown init type %q", t)
@ -169,29 +173,38 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error {
// however, that setupUser (specifically fixStdioPermissions) *will* change // however, that setupUser (specifically fixStdioPermissions) *will* change
// the UID owner of the console to be the user the process will run as (so // the UID owner of the console to be the user the process will run as (so
// they can actually control their console). // they can actually control their console).
console, err := newConsole()
pty, slavePath, err := console.NewPty()
if err != nil { if err != nil {
return err return err
} }
// After we return from here, we don't need the console anymore.
defer console.Close()
linuxConsole, ok := console.(*linuxConsole) if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
if !ok { err = pty.Resize(console.WinSize{
return fmt.Errorf("failed to cast console to *linuxConsole") Height: config.ConsoleHeight,
Width: config.ConsoleWidth,
})
if err != nil {
return err
} }
}
// After we return from here, we don't need the console anymore.
defer pty.Close()
// Mount the console inside our rootfs. // Mount the console inside our rootfs.
if mount { if mount {
if err := linuxConsole.mount(); err != nil { if err := mountConsole(slavePath); err != nil {
return err return err
} }
} }
// While we can access console.master, using the API is a good idea. // While we can access console.master, using the API is a good idea.
if err := utils.SendFd(socket, linuxConsole.File()); err != nil { if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
return err return err
} }
// Now, dup over all the things. // Now, dup over all the things.
return linuxConsole.dupStdio() return dupStdio(slavePath)
} }
// syncParentReady sends to the given pipe a JSON payload which indicates that // syncParentReady sends to the given pipe a JSON payload which indicates that
@ -260,25 +273,27 @@ func setupUser(config *initConfig) error {
} }
} }
// Rather than just erroring out later in setuid(2) and setgid(2), check
// that the user is mapped here.
if _, err := config.Config.HostUID(execUser.Uid); err != nil {
return fmt.Errorf("cannot set uid to unmapped user in user namespace")
}
if _, err := config.Config.HostGID(execUser.Gid); err != nil {
return fmt.Errorf("cannot set gid to unmapped user in user namespace")
}
if config.Rootless { if config.Rootless {
if execUser.Uid != 0 { // We cannot set any additional groups in a rootless container and thus
return fmt.Errorf("cannot run as a non-root user in a rootless container") // we bail if the user asked us to do so. TODO: We currently can't do
} // this check earlier, but if libcontainer.Process.User was typesafe
// this might work.
if execUser.Gid != 0 {
return fmt.Errorf("cannot run as a non-root group in a rootless container")
}
// We cannot set any additional groups in a rootless container and thus we
// bail if the user asked us to do so. TODO: We currently can't do this
// earlier, but if libcontainer.Process.User was typesafe this might work.
if len(addGroups) > 0 { if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container") return fmt.Errorf("cannot set any additional groups in a rootless container")
} }
} }
// before we change to the container's user make sure that the processes STDIO // Before we change to the container's user make sure that the processes
// is correctly owned by the user that we are switching to. // STDIO is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(config, execUser); err != nil { if err := fixStdioPermissions(config, execUser); err != nil {
return err return err
} }
@ -297,7 +312,6 @@ func setupUser(config *initConfig) error {
if err := system.Setgid(execUser.Gid); err != nil { if err := system.Setgid(execUser.Gid); err != nil {
return err return err
} }
if err := system.Setuid(execUser.Uid); err != nil { if err := system.Setuid(execUser.Uid); err != nil {
return err return err
} }
@ -334,14 +348,6 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
continue continue
} }
// Skip chown if s.Gid is actually an unmapped gid in the host. While
// this is a bit dodgy if it just so happens that the console _is_
// owned by overflow_gid, there's no way for us to disambiguate this as
// a userspace program.
if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
continue
}
// We only change the uid owner (as it is possible for the mount to // We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it). // prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is // The reason why we don't just leave the default uid=X mount setup is
@ -349,6 +355,15 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
// this code, you couldn't effectively run as a non-root user inside a // this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up. // container and also have a console set up.
if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil { if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
// If we've hit an EINVAL then s.Gid isn't mapped in the user
// namespace. If we've hit an EPERM then the inode's current owner
// is not mapped in our user namespace (in particular,
// privileged_wrt_inode_uidgid() has failed). In either case, we
// are in a configuration where it's better for us to just not
// touch the stdio rather than bail at this point.
if err == unix.EINVAL || err == unix.EPERM {
continue
}
return err return err
} }
} }
@ -479,6 +494,16 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
logrus.Warn(err) logrus.Warn(err)
} }
subreaper, err := system.GetSubreaper()
if err != nil {
// The error here means that PR_GET_CHILD_SUBREAPER is not
// supported because this code might run on a kernel older
// than 3.4. We don't want to throw an error in that case,
// and we simplify things, considering there is no subreaper
// set.
subreaper = 0
}
for _, p := range procs { for _, p := range procs {
if s != unix.SIGKILL { if s != unix.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil { if ok, err := isWaitable(p.Pid); err != nil {
@ -492,11 +517,18 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
} }
} }
// In case a subreaper has been setup, this code must not
// wait for the process. Otherwise, we cannot be sure the
// current process will be reaped by the subreaper, while
// the subreaper might be waiting for this process in order
// to retrieve its exit code.
if subreaper == 0 {
if _, err := p.Wait(); err != nil { if _, err := p.Wait(); err != nil {
if !isNoChildren(err) { if !isNoChildren(err) {
logrus.Warn("wait: ", err) logrus.Warn("wait: ", err)
} }
} }
} }
}
return nil return nil
} }

View file

@ -0,0 +1,553 @@
// +build linux
package intelrdt
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/opencontainers/runc/libcontainer/configs"
)
/*
* About Intel RDT/CAT feature:
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
* Cache is the only resource that is supported in RDT.
*
* This feature provides a way for the software to restrict cache allocation to a
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
* The different subsets are identified by class of service (CLOS) and each CLOS
* has a capacity bitmask (CBM).
*
* For more information about Intel RDT/CAT can be found in the section 17.17
* of Intel Software Developer Manual.
*
* About Intel RDT/CAT kernel interface:
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
* "resource control" filesystem, which is a "cgroup-like" interface.
*
* Comparing with cgroups, it has similar process management lifecycle and
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
* filesystem layout.
*
* Intel RDT "resource control" filesystem hierarchy:
* mount -t resctrl resctrl /sys/fs/resctrl
* tree /sys/fs/resctrl
* /sys/fs/resctrl/
* |-- info
* | |-- L3
* | |-- cbm_mask
* | |-- min_cbm_bits
* | |-- num_closids
* |-- cpus
* |-- schemata
* |-- tasks
* |-- <container_id>
* |-- cpus
* |-- schemata
* |-- tasks
*
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
* resource constraints.
*
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
* <container_id>" group). Tasks can be added to a group by writing the task ID
* to the "tasks" file (which will automatically remove them from the previous
* group to which they belonged). New tasks created by fork(2) and clone(2) are
* added to the same group as their parent. If a pid is not in any sub group, it is
* in root group.
*
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
* which contains L3 cache id and capacity bitmask (CBM).
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
*
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
* be set is less than the max bit. The max bits in the CBM is varied among
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
*
* For more information about Intel RDT/CAT kernel interface:
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
*
* An example for runc:
* Consider a two-socket machine with two L3 caches where the default CBM is
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
* the "lower" 50% L3 cache id 1:
*
* "linux": {
* "intelRdt": {
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
* }
* }
*/
type Manager interface {
// Applies Intel RDT configuration to the process with the specified pid
Apply(pid int) error
// Returns statistics for Intel RDT
GetStats() (*Stats, error)
// Destroys the Intel RDT 'container_id' group
Destroy() error
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
GetPath() string
// Set Intel RDT "resource control" filesystem as configured.
Set(container *configs.Config) error
}
// This implements interface Manager
type IntelRdtManager struct {
mu sync.Mutex
Config *configs.Config
Id string
Path string
}
const (
IntelRdtTasks = "tasks"
)
var (
// The absolute root path of the Intel RDT "resource control" filesystem
intelRdtRoot string
intelRdtRootLock sync.Mutex
// The flag to indicate if Intel RDT is supported
isEnabled bool
)
type intelRdtData struct {
root string
config *configs.Config
pid int
}
// Check if Intel RDT is enabled in init()
func init() {
// 1. Check if hardware and kernel support Intel RDT/CAT feature
// "cat_l3" flag is set if supported
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
if !isFlagSet || err != nil {
isEnabled = false
return
}
// 2. Check if Intel RDT "resource control" filesystem is mounted
// The user guarantees to mount the filesystem
isEnabled = isIntelRdtMounted()
}
// Return the mount point path of Intel RDT "resource control" filesysem
func findIntelRdtMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
text := s.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
numPostFields := len(postSeparatorFields)
// This is an error as we can't detect if the mount is for "Intel RDT"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "resctrl" {
// Check that the mount is properly formated.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return fields[4], nil
}
}
if err := s.Err(); err != nil {
return "", err
}
return "", NewNotFoundError("Intel RDT")
}
// Gets the root path of Intel RDT "resource control" filesystem
func getIntelRdtRoot() (string, error) {
intelRdtRootLock.Lock()
defer intelRdtRootLock.Unlock()
if intelRdtRoot != "" {
return intelRdtRoot, nil
}
root, err := findIntelRdtMountpointDir()
if err != nil {
return "", err
}
if _, err := os.Stat(root); err != nil {
return "", err
}
intelRdtRoot = root
return intelRdtRoot, nil
}
func isIntelRdtMounted() bool {
_, err := getIntelRdtRoot()
if err != nil {
return false
}
return true
}
func parseCpuInfoFile(path string) (bool, error) {
f, err := os.Open(path)
if err != nil {
return false, err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return false, err
}
text := s.Text()
flags := strings.Split(text, " ")
// "cat_l3" flag is set if Intel RDT/CAT is supported
for _, flag := range flags {
if flag == "cat_l3" {
return true, nil
}
}
}
return false, nil
}
func parseUint(s string, base, bitSize int) (uint64, error) {
value, err := strconv.ParseUint(s, base, bitSize)
if err != nil {
intValue, intErr := strconv.ParseInt(s, base, bitSize)
// 1. Handle negative values greater than MinInt64 (and)
// 2. Handle negative values lesser than MinInt64
if intErr == nil && intValue < 0 {
return 0, nil
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
return 0, nil
}
return value, err
}
return value, nil
}
// Gets a single uint64 value from the specified file.
func getIntelRdtParamUint(path, file string) (uint64, error) {
fileName := filepath.Join(path, file)
contents, err := ioutil.ReadFile(fileName)
if err != nil {
return 0, err
}
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
if err != nil {
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
}
return res, nil
}
// Gets a string value from the specified file
func getIntelRdtParamString(path, file string) (string, error) {
contents, err := ioutil.ReadFile(filepath.Join(path, file))
if err != nil {
return "", err
}
return strings.TrimSpace(string(contents)), nil
}
func readTasksFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
if err != nil {
return nil, err
}
defer f.Close()
var (
s = bufio.NewScanner(f)
out = []int{}
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, pid)
}
}
return out, nil
}
func writeFile(dir, file, data string) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
}
return nil
}
func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
return &intelRdtData{
root: rootPath,
config: c,
pid: pid,
}, nil
}
// Get the read-only L3 cache information
func getL3CacheInfo() (*L3CacheInfo, error) {
l3CacheInfo := &L3CacheInfo{}
rootPath, err := getIntelRdtRoot()
if err != nil {
return l3CacheInfo, err
}
path := filepath.Join(rootPath, "info", "L3")
cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
if err != nil {
return l3CacheInfo, err
}
minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
if err != nil {
return l3CacheInfo, err
}
numClosids, err := getIntelRdtParamUint(path, "num_closids")
if err != nil {
return l3CacheInfo, err
}
l3CacheInfo.CbmMask = cbmMask
l3CacheInfo.MinCbmBits = minCbmBits
l3CacheInfo.NumClosids = numClosids
return l3CacheInfo, nil
}
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
func WriteIntelRdtTasks(dir string, pid int) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
}
// Dont attach any pid if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
}
}
return nil
}
// Check if Intel RDT is enabled
func IsEnabled() bool {
return isEnabled
}
// Get the 'container_id' path in Intel RDT "resource control" filesystem
func GetIntelRdtPath(id string) (string, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return "", err
}
path := filepath.Join(rootPath, id)
return path, nil
}
// Applies Intel RDT configuration to the process with the specified pid
func (m *IntelRdtManager) Apply(pid int) (err error) {
// If intelRdt is not specified in config, we do nothing
if m.Config.IntelRdt == nil {
return nil
}
d, err := getIntelRdtData(m.Config, pid)
if err != nil && !IsNotFound(err) {
return err
}
m.mu.Lock()
defer m.mu.Unlock()
path, err := d.join(m.Id)
if err != nil {
return err
}
m.Path = path
return nil
}
// Destroys the Intel RDT 'container_id' group
func (m *IntelRdtManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
if err := os.RemoveAll(m.Path); err != nil {
return err
}
m.Path = ""
return nil
}
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
func (m *IntelRdtManager) GetPath() string {
if m.Path == "" {
m.Path, _ = GetIntelRdtPath(m.Id)
}
return m.Path
}
// Returns statistics for Intel RDT
func (m *IntelRdtManager) GetStats() (*Stats, error) {
// If intelRdt is not specified in config
if m.Config.IntelRdt == nil {
return nil, nil
}
m.mu.Lock()
defer m.mu.Unlock()
stats := NewStats()
// The read-only L3 cache information
l3CacheInfo, err := getL3CacheInfo()
if err != nil {
return nil, err
}
stats.L3CacheInfo = l3CacheInfo
// The read-only L3 cache schema in root
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
stats.L3CacheSchemaRoot = schemaRootStrings[0]
// The L3 cache schema in 'container_id' group
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaStrings := strings.Split(tmpStrings, "\n")
stats.L3CacheSchema = schemaStrings[0]
return stats, nil
}
// Set Intel RDT "resource control" filesystem as configured.
func (m *IntelRdtManager) Set(container *configs.Config) error {
path := m.GetPath()
// About L3 cache schema file:
// The schema has allocation masks/values for L3 cache on each socket,
// which contains L3 cache id and capacity bitmask (CBM).
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
// For example, on a two-socket machine, L3's schema line could be:
// L3:0=ff;1=c0
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
//
// About L3 cache CBM validity:
// The valid L3 cache CBM is a *contiguous bits set* and number of
// bits that can be set is less than the max bit. The max bits in the
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
// "resource control" filesystem layout, the CBM in a group should
// be a subset of the CBM in root. Kernel will check if it is valid
// when writing.
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
// which mapping to entire L3 cache capacity. Some valid CBM values
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
if container.IntelRdt != nil {
l3CacheSchema := container.IntelRdt.L3CacheSchema
if l3CacheSchema != "" {
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
return err
}
}
}
return nil
}
func (raw *intelRdtData) join(id string) (string, error) {
path := filepath.Join(raw.root, id)
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
return "", err
}
return path, nil
}
type NotFoundError struct {
ResourceControl string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
}
func NewNotFoundError(res string) error {
return &NotFoundError{
ResourceControl: res,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}

View file

@ -0,0 +1,24 @@
// +build linux
package intelrdt
type L3CacheInfo struct {
CbmMask string `json:"cbm_mask,omitempty"`
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
NumClosids uint64 `json:"num_closids,omitempty"`
}
type Stats struct {
// The read-only L3 cache information
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
// The read-only L3 cache schema in root
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
// The L3 cache schema in 'container_id' group
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}
func NewStats() *Stats {
return &Stats{}
}

View file

@ -29,7 +29,7 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
return err return err
} }
res := strings.Split(string(dest), ";") res := strings.Split(dest, ";")
if len(res) < 5 { if len(res) < 5 {
return fmt.Errorf("Destination buffer for key description is too small") return fmt.Errorf("Destination buffer for key description is too small")
} }

View file

@ -18,6 +18,8 @@ const (
SetgroupAttr uint16 = 27285 SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286 OomScoreAdjAttr uint16 = 27286
RootlessAttr uint16 = 27287 RootlessAttr uint16 = 27287
UidmapPathAttr uint16 = 27288
GidmapPathAttr uint16 = 27289
) )
type Int32msg struct { type Int32msg struct {

View file

@ -0,0 +1,23 @@
package mount
// GetMounts retrieves a list of mounts for the current running process.
func GetMounts() ([]*Info, error) {
return parseMountTable()
}
// Mounted looks at /proc/self/mountinfo to determine of the specified
// mountpoint has been mounted
func Mounted(mountpoint string) (bool, error) {
entries, err := parseMountTable()
if err != nil {
return false, err
}
// Search the table for the mountpoint
for _, e := range entries {
if e.Mountpoint == mountpoint {
return true, nil
}
}
return false, nil
}

View file

@ -0,0 +1,82 @@
// +build linux
package mount
import (
"bufio"
"fmt"
"io"
"os"
"strings"
)
const (
/* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
(1) mount ID: unique identifier of the mount (may be reused after umount)
(2) parent ID: ID of parent (or of self for the top of the mount tree)
(3) major:minor: value of st_dev for files on filesystem
(4) root: root of the mount within the filesystem
(5) mount point: mount point relative to the process's root
(6) mount options: per mount options
(7) optional fields: zero or more fields of the form "tag[:value]"
(8) separator: marks the end of the optional fields
(9) filesystem type: name of filesystem of the form "type[.subtype]"
(10) mount source: filesystem specific information or "none"
(11) super options: per super block options*/
mountinfoFormat = "%d %d %d:%d %s %s %s %s"
)
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
// bind mounts
func parseMountTable() ([]*Info, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
return parseInfoFile(f)
}
func parseInfoFile(r io.Reader) ([]*Info, error) {
var (
s = bufio.NewScanner(r)
out = []*Info{}
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
var (
p = &Info{}
text = s.Text()
optionalFields string
)
if _, err := fmt.Sscanf(text, mountinfoFormat,
&p.ID, &p.Parent, &p.Major, &p.Minor,
&p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil {
return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
}
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
if len(postSeparatorFields) < 3 {
return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
if optionalFields != "-" {
p.Optional = optionalFields
}
p.Fstype = postSeparatorFields[0]
p.Source = postSeparatorFields[1]
p.VfsOpts = strings.Join(postSeparatorFields[2:], " ")
out = append(out, p)
}
return out, nil
}

View file

@ -0,0 +1,40 @@
package mount
// Info reveals information about a particular mounted filesystem. This
// struct is populated from the content in the /proc/<pid>/mountinfo file.
type Info struct {
// ID is a unique identifier of the mount (may be reused after umount).
ID int
// Parent indicates the ID of the mount parent (or of self for the top of the
// mount tree).
Parent int
// Major indicates one half of the device ID which identifies the device class.
Major int
// Minor indicates one half of the device ID which identifies a specific
// instance of device.
Minor int
// Root of the mount within the filesystem.
Root string
// Mountpoint indicates the mount point relative to the process's root.
Mountpoint string
// Opts represents mount-specific options.
Opts string
// Optional represents optional fields.
Optional string
// Fstype indicates the type of filesystem, such as EXT3.
Fstype string
// Source indicates filesystem specific information or "none".
Source string
// VfsOpts represents per super block options.
VfsOpts string
}

View file

@ -44,9 +44,9 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct
ch := make(chan struct{}) ch := make(chan struct{})
go func() { go func() {
defer func() { defer func() {
close(ch)
eventfd.Close() eventfd.Close()
evFile.Close() evFile.Close()
close(ch)
}() }()
buf := make([]byte, 8) buf := make([]byte, 8)
for { for {

View file

@ -1,3 +1,4 @@
#define _GNU_SOURCE #define _GNU_SOURCE
#include <endian.h> #include <endian.h>
#include <errno.h> #include <errno.h>
@ -19,6 +20,8 @@
#include <sys/prctl.h> #include <sys/prctl.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/wait.h>
#include <linux/limits.h> #include <linux/limits.h>
#include <linux/netlink.h> #include <linux/netlink.h>
@ -64,7 +67,13 @@ struct clone_t {
struct nlconfig_t { struct nlconfig_t {
char *data; char *data;
/* Process settings. */
uint32_t cloneflags; uint32_t cloneflags;
char *oom_score_adj;
size_t oom_score_adj_len;
/* User namespace settings.*/
char *uidmap; char *uidmap;
size_t uidmap_len; size_t uidmap_len;
char *gidmap; char *gidmap;
@ -72,9 +81,13 @@ struct nlconfig_t {
char *namespaces; char *namespaces;
size_t namespaces_len; size_t namespaces_len;
uint8_t is_setgroup; uint8_t is_setgroup;
/* Rootless container settings.*/
uint8_t is_rootless; uint8_t is_rootless;
char *oom_score_adj; char *uidmappath;
size_t oom_score_adj_len; size_t uidmappath_len;
char *gidmappath;
size_t gidmappath_len;
}; };
/* /*
@ -89,6 +102,8 @@ struct nlconfig_t {
#define SETGROUP_ATTR 27285 #define SETGROUP_ATTR 27285
#define OOM_SCORE_ADJ_ATTR 27286 #define OOM_SCORE_ADJ_ATTR 27286
#define ROOTLESS_ATTR 27287 #define ROOTLESS_ATTR 27287
#define UIDMAPPATH_ATTR 27288
#define GIDMAPPATH_ATTR 27289
/* /*
* Use the raw syscall for versions of glibc which don't include a function for * Use the raw syscall for versions of glibc which don't include a function for
@ -191,22 +206,96 @@ static void update_setgroups(int pid, enum policy_t setgroup)
} }
} }
static void update_uidmap(int pid, char *map, size_t map_len) static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len)
{ {
if (map == NULL || map_len <= 0) int child;
return;
if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) /*
bail("failed to update /proc/%d/uid_map", pid); * If @app is NULL, execve will segfault. Just check it here and bail (if
* we're in this path, the caller is already getting desparate and there
* isn't a backup to this failing). This usually would be a configuration
* or programming issue.
*/
if (!app)
bail("mapping tool not present");
child = fork();
if (child < 0)
bail("failed to fork");
if (!child) {
#define MAX_ARGV 20
char *argv[MAX_ARGV];
char *envp[] = {NULL};
char pid_fmt[16];
int argc = 0;
char *next;
snprintf(pid_fmt, 16, "%d", pid);
argv[argc++] = (char *) app;
argv[argc++] = pid_fmt;
/*
* Convert the map string into a list of argument that
* newuidmap/newgidmap can understand.
*/
while (argc < MAX_ARGV) {
if (*map == '\0') {
argv[argc++] = NULL;
break;
}
argv[argc++] = map;
next = strpbrk(map, "\n ");
if (next == NULL)
break;
*next++ = '\0';
map = next + strspn(next, "\n ");
}
execve(app, argv, envp);
bail("failed to execv");
} else {
int status;
while (true) {
if (waitpid(child, &status, 0) < 0) {
if (errno == EINTR)
continue;
bail("failed to waitpid");
}
if (WIFEXITED(status) || WIFSIGNALED(status))
return WEXITSTATUS(status);
}
}
return -1;
} }
static void update_gidmap(int pid, char *map, size_t map_len) static void update_uidmap(const char *path, int pid, char *map, size_t map_len)
{ {
if (map == NULL || map_len <= 0) if (map == NULL || map_len <= 0)
return; return;
if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
if (errno != EPERM)
bail("failed to update /proc/%d/uid_map", pid);
if (try_mapping_tool(path, pid, map, map_len))
bail("failed to use newuid map on %d", pid);
}
}
static void update_gidmap(const char *path, int pid, char *map, size_t map_len)
{
if (map == NULL || map_len <= 0)
return;
if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
if (errno != EPERM)
bail("failed to update /proc/%d/gid_map", pid); bail("failed to update /proc/%d/gid_map", pid);
if (try_mapping_tool(path, pid, map, map_len))
bail("failed to use newgid map on %d", pid);
}
} }
static void update_oom_score_adj(char *data, size_t len) static void update_oom_score_adj(char *data, size_t len)
@ -350,6 +439,14 @@ static void nl_parse(int fd, struct nlconfig_t *config)
config->gidmap = current; config->gidmap = current;
config->gidmap_len = payload_len; config->gidmap_len = payload_len;
break; break;
case UIDMAPPATH_ATTR:
config->uidmappath = current;
config->uidmappath_len = payload_len;
break;
case GIDMAPPATH_ATTR:
config->gidmappath = current;
config->gidmappath_len = payload_len;
break;
case SETGROUP_ATTR: case SETGROUP_ATTR:
config->is_setgroup = readint8(current); config->is_setgroup = readint8(current);
break; break;
@ -542,7 +639,7 @@ void nsexec(void)
*/ */
case JUMP_PARENT: { case JUMP_PARENT: {
int len; int len;
pid_t child; pid_t child, first_child = -1;
char buf[JSON_MAX]; char buf[JSON_MAX];
bool ready = false; bool ready = false;
@ -596,8 +693,8 @@ void nsexec(void)
update_setgroups(child, SETGROUPS_DENY); update_setgroups(child, SETGROUPS_DENY);
/* Set up mappings. */ /* Set up mappings. */
update_uidmap(child, config.uidmap, config.uidmap_len); update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
update_gidmap(child, config.gidmap, config.gidmap_len); update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
s = SYNC_USERMAP_ACK; s = SYNC_USERMAP_ACK;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
@ -606,18 +703,18 @@ void nsexec(void)
} }
break; break;
case SYNC_RECVPID_PLS: { case SYNC_RECVPID_PLS: {
pid_t old = child; first_child = child;
/* Get the init_func pid. */ /* Get the init_func pid. */
if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
kill(old, SIGKILL); kill(first_child, SIGKILL);
bail("failed to sync with child: read(childpid)"); bail("failed to sync with child: read(childpid)");
} }
/* Send ACK. */ /* Send ACK. */
s = SYNC_RECVPID_ACK; s = SYNC_RECVPID_ACK;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(old, SIGKILL); kill(first_child, SIGKILL);
kill(child, SIGKILL); kill(child, SIGKILL);
bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
} }
@ -665,8 +762,13 @@ void nsexec(void)
} }
} }
/* Send the init_func pid back to our parent. */ /*
len = snprintf(buf, JSON_MAX, "{\"pid\": %d}\n", child); * Send the init_func pid and the pid of the first child back to our parent.
*
* We need to send both back because we can't reap the first child we created (CLONE_PARENT).
* It becomes the responsibility of our parent to reap the first child.
*/
len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
if (len < 0) { if (len < 0) {
kill(child, SIGKILL); kill(child, SIGKILL);
bail("unable to generate JSON for child pid"); bail("unable to generate JSON for child pid");

View file

@ -47,6 +47,10 @@ type Process struct {
// ExtraFiles specifies additional open files to be inherited by the container // ExtraFiles specifies additional open files to be inherited by the container
ExtraFiles []*os.File ExtraFiles []*os.File
// Initial sizings for the console
ConsoleWidth uint16
ConsoleHeight uint16
// Capabilities specify the capabilities to keep when executing the process inside the container // Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask // All capabilities not specified will be dropped from the processes capability mask
Capabilities *configs.Capabilities Capabilities *configs.Capabilities

View file

@ -15,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
@ -49,6 +50,7 @@ type setnsProcess struct {
parentPipe *os.File parentPipe *os.File
childPipe *os.File childPipe *os.File
cgroupPaths map[string]string cgroupPaths map[string]string
intelRdtPath string
config *initConfig config *initConfig
fds []string fds []string
process *Process process *Process
@ -83,12 +85,20 @@ func (p *setnsProcess) start() (err error) {
if err = p.execSetns(); err != nil { if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process") return newSystemErrorWithCause(err, "executing setns process")
} }
// We can't join cgroups if we're in a rootless container. if len(p.cgroupPaths) > 0 {
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
} }
} }
if p.intelRdtPath != "" {
// if Intel RDT "resource control" filesystem path exists
_, err := os.Stat(p.intelRdtPath)
if err == nil {
if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid())
}
}
}
// set rlimits, this has to be done here because we lose permissions // set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace // to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@ -141,6 +151,16 @@ func (p *setnsProcess) execSetns() error {
p.cmd.Wait() p.cmd.Wait()
return newSystemErrorWithCause(err, "reading pid from init pipe") return newSystemErrorWithCause(err, "reading pid from init pipe")
} }
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
process, err := os.FindProcess(pid.Pid) process, err := os.FindProcess(pid.Pid)
if err != nil { if err != nil {
return err return err
@ -188,12 +208,12 @@ type initProcess struct {
childPipe *os.File childPipe *os.File
config *initConfig config *initConfig
manager cgroups.Manager manager cgroups.Manager
intelRdtManager intelrdt.Manager
container *linuxContainer container *linuxContainer
fds []string fds []string
process *Process process *Process
bootstrapData io.Reader bootstrapData io.Reader
sharePidns bool sharePidns bool
rootDir *os.File
} }
func (p *initProcess) pid() int { func (p *initProcess) pid() int {
@ -224,6 +244,16 @@ func (p *initProcess) execSetns() error {
p.cmd.Wait() p.cmd.Wait()
return err return err
} }
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
process, err := os.FindProcess(pid.Pid) process, err := os.FindProcess(pid.Pid)
if err != nil { if err != nil {
return err return err
@ -238,17 +268,39 @@ func (p *initProcess) start() error {
err := p.cmd.Start() err := p.cmd.Start()
p.process.ops = p p.process.ops = p
p.childPipe.Close() p.childPipe.Close()
p.rootDir.Close()
if err != nil { if err != nil {
p.process.ops = nil p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command") return newSystemErrorWithCause(err, "starting init process command")
} }
// Do this before syncing with child so that no children can escape the
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
}
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
if p.intelRdtManager != nil {
p.intelRdtManager.Destroy()
}
}
}()
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe") return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
} }
if err := p.execSetns(); err != nil { if err := p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "running exec setns process for init") return newSystemErrorWithCause(err, "running exec setns process for init")
} }
// Save the standard descriptor names before the container process // Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now, // can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up. // we won't know at checkpoint time which file descriptor to look up.
@ -257,18 +309,6 @@ func (p *initProcess) start() error {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
} }
p.setExternalDescriptors(fds) p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children can escape the
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
}()
if err := p.createNetworkInterfaces(); err != nil { if err := p.createNetworkInterfaces(); err != nil {
return newSystemErrorWithCause(err, "creating network interfaces") return newSystemErrorWithCause(err, "creating network interfaces")
} }
@ -294,13 +334,20 @@ func (p *initProcess) start() error {
if err := p.manager.Set(p.config.Config); err != nil { if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process") return newSystemErrorWithCause(err, "setting cgroup config for ready process")
} }
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting Intel RDT config for ready process")
}
}
if p.config.Config.Hooks != nil { if p.config.Config.Hooks != nil {
bundle, annotations := utils.Annotations(p.container.config.Labels)
s := configs.HookState{ s := configs.HookState{
Version: p.container.config.Version, Version: p.container.config.Version,
ID: p.container.id, ID: p.container.id,
Pid: p.pid(), Pid: p.pid(),
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"), Bundle: bundle,
Annotations: annotations,
} }
for i, hook := range p.config.Config.Hooks.Prestart { for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil { if err := hook.Run(s); err != nil {
@ -319,12 +366,19 @@ func (p *initProcess) start() error {
if err := p.manager.Set(p.config.Config); err != nil { if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process") return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
} }
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process")
}
}
if p.config.Config.Hooks != nil { if p.config.Config.Hooks != nil {
bundle, annotations := utils.Annotations(p.container.config.Labels)
s := configs.HookState{ s := configs.HookState{
Version: p.container.config.Version, Version: p.container.config.Version,
ID: p.container.id, ID: p.container.id,
Pid: p.pid(), Pid: p.pid(),
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"), Bundle: bundle,
Annotations: annotations,
} }
for i, hook := range p.config.Config.Hooks.Prestart { for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil { if err := hook.Run(s); err != nil {

View file

@ -13,11 +13,11 @@ import (
"strings" "strings"
"time" "time"
"github.com/docker/docker/pkg/mount" "github.com/cyphar/filepath-securejoin"
"github.com/docker/docker/pkg/symlink"
"github.com/mrunalp/fileutils" "github.com/mrunalp/fileutils"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/mount"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/selinux/go-selinux/label" "github.com/opencontainers/selinux/go-selinux/label"
@ -40,7 +40,8 @@ func needsSetupDev(config *configs.Config) bool {
// prepareRootfs sets up the devices, mount points, and filesystems for use // prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro. You must call // inside a new mount namespace. It doesn't set anything as ro. You must call
// finalizeRootfs after this function to finish setting up the rootfs. // finalizeRootfs after this function to finish setting up the rootfs.
func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) { func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
config := iConfig.Config
if err := prepareRoot(config); err != nil { if err := prepareRoot(config); err != nil {
return newSystemErrorWithCause(err, "preparing rootfs") return newSystemErrorWithCause(err, "preparing rootfs")
} }
@ -80,6 +81,7 @@ func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
// The hooks are run after the mounts are setup, but before we switch to the new // The hooks are run after the mounts are setup, but before we switch to the new
// root, so that the old root is still available in the hooks for any mount // root, so that the old root is still available in the hooks for any mount
// manipulations. // manipulations.
// Note that iConfig.Cwd is not guaranteed to exist here.
if err := syncParentHooks(pipe); err != nil { if err := syncParentHooks(pipe); err != nil {
return err return err
} }
@ -111,6 +113,14 @@ func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
} }
} }
if cwd := iConfig.Cwd; cwd != "" {
// Note that spec.Process.Cwd can contain unclean value like "../../../../foo/bar...".
// However, we are safe to call MkDirAll directly because we are in the jail here.
if err := os.MkdirAll(cwd, 0755); err != nil {
return err
}
}
return nil return nil
} }
@ -230,7 +240,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
// any previous mounts can invalidate the next mount's destination. // any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other // this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs. // evil stuff to try to escape the container's rootfs.
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
return err return err
} }
if err := checkMountDestination(rootfs, dest); err != nil { if err := checkMountDestination(rootfs, dest); err != nil {
@ -318,7 +328,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
// this can happen when a user specifies mounts within other mounts to cause breakouts or other // this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs. // evil stuff to try to escape the container's rootfs.
var err error var err error
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
return err return err
} }
if err := checkMountDestination(rootfs, dest); err != nil { if err := checkMountDestination(rootfs, dest); err != nil {
@ -668,9 +678,12 @@ func pivotRoot(rootfs string) error {
return err return err
} }
// Make oldroot rprivate to make sure our unmounts don't propagate to the // Make oldroot rslave to make sure our unmounts don't propagate to the
// host (and thus bork the machine). // host (and thus bork the machine). We don't use rprivate because this is
if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil { // known to cause issues due to races where we still have a reference to a
// mount while a process in the host namespace are trying to operate on
// something they think has no mounts (devicemapper in particular).
if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
return err return err
} }
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. // Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
@ -733,7 +746,14 @@ func remountReadonly(m *configs.Mount) error {
flags = m.Flags flags = m.Flags
) )
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
if err := unix.Mount("", dest, "", uintptr(flags|unix.MS_REMOUNT|unix.MS_RDONLY), ""); err != nil { // There is a special case in the kernel for
// MS_REMOUNT | MS_BIND, which allows us to change only the
// flags even as an unprivileged user (i.e. user namespace)
// assuming we don't drop any security related flags (nodev,
// nosuid, etc.). So, let's use that case so that we can do
// this re-mount without failing in a userns.
flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY
if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil {
switch err { switch err {
case unix.EBUSY: case unix.EBUSY:
time.Sleep(100 * time.Millisecond) time.Sleep(100 * time.Millisecond)

View file

@ -22,6 +22,11 @@ var (
actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
) )
const (
// Linux system calls can have at most 6 arguments
syscallMaxArguments int = 6
)
// Filters given syscalls in a container, preventing them from being used // Filters given syscalls in a container, preventing them from being used
// Started in the container init process, and carried over to all child processes // Started in the container init process, and carried over to all child processes
// Setns calls, however, require a separate invocation, as they are not children // Setns calls, however, require a separate invocation, as they are not children
@ -45,11 +50,11 @@ func InitSeccomp(config *configs.Seccomp) error {
for _, arch := range config.Architectures { for _, arch := range config.Architectures {
scmpArch, err := libseccomp.GetArchFromString(arch) scmpArch, err := libseccomp.GetArchFromString(arch)
if err != nil { if err != nil {
return err return fmt.Errorf("error validating Seccomp architecture: %s", err)
} }
if err := filter.AddArch(scmpArch); err != nil { if err := filter.AddArch(scmpArch); err != nil {
return err return fmt.Errorf("error adding architecture to seccomp filter: %s", err)
} }
} }
@ -170,29 +175,55 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
// Convert the call's action to the libseccomp equivalent // Convert the call's action to the libseccomp equivalent
callAct, err := getAction(call.Action) callAct, err := getAction(call.Action)
if err != nil { if err != nil {
return err return fmt.Errorf("action in seccomp profile is invalid: %s", err)
} }
// Unconditional match - just add the rule // Unconditional match - just add the rule
if len(call.Args) == 0 { if len(call.Args) == 0 {
if err = filter.AddRule(callNum, callAct); err != nil { if err = filter.AddRule(callNum, callAct); err != nil {
return err return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err)
} }
} else { } else {
// Conditional match - convert the per-arg rules into library format // If two or more arguments have the same condition,
// Revert to old behavior, adding each condition as a separate rule
argCounts := make([]uint, syscallMaxArguments)
conditions := []libseccomp.ScmpCondition{} conditions := []libseccomp.ScmpCondition{}
for _, cond := range call.Args { for _, cond := range call.Args {
newCond, err := getCondition(cond) newCond, err := getCondition(cond)
if err != nil { if err != nil {
return err return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err)
} }
argCounts[cond.Index] += 1
conditions = append(conditions, newCond) conditions = append(conditions, newCond)
} }
hasMultipleArgs := false
for _, count := range argCounts {
if count > 1 {
hasMultipleArgs = true
break
}
}
if hasMultipleArgs {
// Revert to old behavior
// Add each condition attached to a separate rule
for _, cond := range conditions {
condArr := []libseccomp.ScmpCondition{cond}
if err = filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
}
}
} else {
// No conditions share same argument
// Use new, proper behavior
if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
return err return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
}
} }
} }

View file

@ -1,11 +0,0 @@
// +build linux,go1.5
package libcontainer
import "syscall"
// Set the GidMappingsEnableSetgroups member to true, so the process's
// setgroups proc entry wont be set to 'deny' if GidMappings are set
func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = true
}

View file

@ -47,7 +47,10 @@ func (l *linuxSetnsInit) Init() error {
return err return err
} }
} }
if l.config.Config.Seccomp != nil { // Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err return err
} }
@ -61,5 +64,13 @@ func (l *linuxSetnsInit) Init() error {
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err return err
} }
// Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles).
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
} }

View file

@ -22,7 +22,7 @@ type linuxStandardInit struct {
pipe *os.File pipe *os.File
consoleSocket *os.File consoleSocket *os.File
parentPid int parentPid int
stateDirFD int fifoFd int
config *initConfig config *initConfig
} }
@ -30,15 +30,15 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
var newperms uint32 var newperms uint32
if l.config.Config.Namespaces.Contains(configs.NEWUSER) { if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
// with user ns we need 'other' search permissions // With user ns we need 'other' search permissions.
newperms = 0x8 newperms = 0x8
} else { } else {
// without user ns we need 'UID' search permissions // Without user ns we need 'UID' search permissions.
newperms = 0x80000 newperms = 0x80000
} }
// create a unique per session container name that we can // Create a unique per session container name that we can join in setns;
// join in setns; however, other containers can also join it // However, other containers can also join it.
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
} }
@ -46,12 +46,12 @@ func (l *linuxStandardInit) Init() error {
if !l.config.Config.NoNewKeyring { if !l.config.Config.NoNewKeyring {
ringname, keepperms, newperms := l.getSessionRingParams() ringname, keepperms, newperms := l.getSessionRingParams()
// do not inherit the parent's session keyring // Do not inherit the parent's session keyring.
sessKeyId, err := keys.JoinSessionKeyring(ringname) sessKeyId, err := keys.JoinSessionKeyring(ringname)
if err != nil { if err != nil {
return err return err
} }
// make session keyring searcheable // Make session keyring searcheable.
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err return err
} }
@ -68,7 +68,7 @@ func (l *linuxStandardInit) Init() error {
// prepareRootfs() can be executed only for a new mount namespace. // prepareRootfs() can be executed only for a new mount namespace.
if l.config.Config.Namespaces.Contains(configs.NEWNS) { if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := prepareRootfs(l.pipe, l.config.Config); err != nil { if err := prepareRootfs(l.pipe, l.config); err != nil {
return err return err
} }
} }
@ -150,37 +150,47 @@ func (l *linuxStandardInit) Init() error {
if err := pdeath.Restore(); err != nil { if err := pdeath.Restore(); err != nil {
return err return err
} }
// compare the parent from the initial start of the init process and make sure that it did not change. // Compare the parent from the initial start of the init process and make
// if the parent changes that means it died and we were reparented to something else so we should // sure that it did not change. if the parent changes that means it died
// just kill ourself and not cause problems for someone else. // and we were reparented to something else so we should just kill ourself
// and not cause problems for someone else.
if unix.Getppid() != l.parentPid { if unix.Getppid() != l.parentPid {
return unix.Kill(unix.Getpid(), unix.SIGKILL) return unix.Kill(unix.Getpid(), unix.SIGKILL)
} }
// check for the arg before waiting to make sure it exists and it is returned // Check for the arg before waiting to make sure it exists and it is
// as a create time error. // returned as a create time error.
name, err := exec.LookPath(l.config.Args[0]) name, err := exec.LookPath(l.config.Args[0])
if err != nil { if err != nil {
return err return err
} }
// close the pipe to signal that we have completed our init. // Close the pipe to signal that we have completed our init.
l.pipe.Close() l.pipe.Close()
// wait for the fifo to be opened on the other side before // Wait for the FIFO to be opened on the other side before exec-ing the
// exec'ing the users process. // user process. We open it through /proc/self/fd/$fd, because the fd that
fd, err := unix.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|unix.O_CLOEXEC, 0) // was given to us was an O_PATH fd to the fifo itself. Linux allows us to
// re-open an O_PATH fd through /proc.
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
if err != nil { if err != nil {
return newSystemErrorWithCause(err, "openat exec fifo") return newSystemErrorWithCause(err, "open exec fifo")
} }
if _, err := unix.Write(fd, []byte("0")); err != nil { if _, err := unix.Write(fd, []byte("0")); err != nil {
return newSystemErrorWithCause(err, "write 0 exec fifo") return newSystemErrorWithCause(err, "write 0 exec fifo")
} }
// Close the O_PATH fifofd fd before exec because the kernel resets
// dumpable in the wrong order. This has been fixed in newer kernels, but
// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
// N.B. the core issue itself (passing dirfds to the host filesystem) has
// since been resolved.
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
unix.Close(l.fifoFd)
// Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles).
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp") return newSystemErrorWithCause(err, "init seccomp")
} }
} }
// close the statedir fd before exec because the kernel resets dumpable in the wrong order
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
unix.Close(l.stateDirFD)
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil { if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process") return newSystemErrorWithCause(err, "exec user process")
} }

View file

@ -45,6 +45,11 @@ func destroy(c *linuxContainer) error {
} }
} }
err := c.cgroupManager.Destroy() err := c.cgroupManager.Destroy()
if c.intelRdtManager != nil {
if ierr := c.intelRdtManager.Destroy(); err == nil {
err = ierr
}
}
if rerr := os.RemoveAll(c.root); err == nil { if rerr := os.RemoveAll(c.root); err == nil {
err = rerr err = rerr
} }
@ -58,10 +63,12 @@ func destroy(c *linuxContainer) error {
func runPoststopHooks(c *linuxContainer) error { func runPoststopHooks(c *linuxContainer) error {
if c.config.Hooks != nil { if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{ s := configs.HookState{
Version: c.config.Version, Version: c.config.Version,
ID: c.id, ID: c.id,
Bundle: utils.SearchLabels(c.config.Labels, "bundle"), Bundle: bundle,
Annotations: annotations,
} }
for _, hook := range c.config.Hooks.Poststop { for _, hook := range c.config.Hooks.Poststop {
if err := hook.Run(s); err != nil { if err := hook.Run(s); err != nil {

View file

@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -1,8 +1,10 @@
package libcontainer package libcontainer
import "github.com/opencontainers/runc/libcontainer/cgroups" import "github.com/opencontainers/runc/libcontainer/cgroups"
import "github.com/opencontainers/runc/libcontainer/intelrdt"
type Stats struct { type Stats struct {
Interfaces []*NetworkInterface Interfaces []*NetworkInterface
CgroupStats *cgroups.Stats CgroupStats *cgroups.Stats
IntelRdtStats *intelrdt.Stats
} }

View file

@ -1,7 +0,0 @@
package libcontainer
// Solaris - TODO
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -134,3 +134,14 @@ func RunningInUserNS() bool {
func SetSubreaper(i int) error { func SetSubreaper(i int) error {
return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
} }
// GetSubreaper returns the subreaper setting for the calling process
func GetSubreaper() (int, error) {
var i uintptr
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
return -1, err
}
return int(i), nil
}

View file

@ -1,4 +1,5 @@
// +build linux,arm // +build linux
// +build 386 arm
package system package system

View file

@ -1,25 +0,0 @@
// +build linux,386
package system
import (
"golang.org/x/sys/unix"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
return
}
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}
return
}

View file

@ -1,4 +1,5 @@
// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x // +build linux
// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le s390x
package system package system

View file

@ -1,4 +1,4 @@
// +build cgo,linux cgo,freebsd // +build cgo,linux
package system package system

View file

@ -1,38 +0,0 @@
// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris
package user
import (
"io"
"syscall"
)
func GetPasswdPath() (string, error) {
return "", ErrUnsupported
}
func GetPasswd() (io.ReadCloser, error) {
return nil, ErrUnsupported
}
func GetGroupPath() (string, error) {
return "", ErrUnsupported
}
func GetGroup() (io.ReadCloser, error) {
return nil, ErrUnsupported
}
// CurrentUser looks up the current user by their user id in /etc/passwd. If the
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(syscall.Getuid())
}
// CurrentGroup looks up the current user's group by their primary group id's
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(syscall.Getgid())
}

View file

@ -84,12 +84,10 @@ func RecvFd(socket *os.File) (*os.File, error) {
// addition, the file.Name() of the given file will also be sent as // addition, the file.Name() of the given file will also be sent as
// non-auxiliary data in the same payload (allowing to send contextual // non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor). // information for a file descriptor).
func SendFd(socket, file *os.File) error { func SendFd(socket *os.File, name string, fd uintptr) error {
name := []byte(file.Name())
if len(name) >= MaxNameLen { if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", file.Name()) return fmt.Errorf("sendfd: filename too long: %s", name)
} }
oob := unix.UnixRights(int(file.Fd())) oob := unix.UnixRights(int(fd))
return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0)
return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
} }

View file

@ -5,7 +5,7 @@ github.com/opencontainers/runtime-spec v1.0.0
# Core libcontainer functionality. # Core libcontainer functionality.
github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08 github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08
github.com/opencontainers/selinux v1.0.0-rc1 github.com/opencontainers/selinux v1.0.0-rc1
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 github.com/seccomp/libseccomp-golang 84e90a91acea0f4e51e62bc1a75de18b1fc0790f
github.com/sirupsen/logrus a3f95b5c423586578a4e099b11a46c2479628cac github.com/sirupsen/logrus a3f95b5c423586578a4e099b11a46c2479628cac
github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16
github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270 github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270
@ -15,7 +15,11 @@ github.com/coreos/pkg v3
github.com/godbus/dbus v3 github.com/godbus/dbus v3
github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8 github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8
# Command-line interface. # Command-line interface.
github.com/docker/docker 0f5c9d301b9b1cca66b3ea0f9dec3b5317d3686d github.com/cyphar/filepath-securejoin v0.2.1
github.com/docker/go-units v0.2.0 github.com/docker/go-units v0.2.0
github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e
golang.org/x/sys 0e0164865330d5cf1c00247be08330bf96e2f87c https://github.com/golang/sys golang.org/x/sys 7ddbeae9ae08c6a06a59597f0c9edbc5ff2444ce https://github.com/golang/sys
# console dependencies
github.com/containerd/console 84eeaae905fa414d03e07bcd6c8d3f19e7cf180e
github.com/pkg/errors v0.8.0

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {

View file

@ -1446,6 +1446,22 @@ func Mlock(b []byte) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Mlockall(flags int) (err error) { func Mlockall(flags int) (err error) {
_, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0) _, _, e1 := Syscall(SYS_MLOCKALL, uintptr(flags), 0, 0)
if e1 != 0 { if e1 != 0 {
@ -1472,22 +1488,6 @@ func Msync(b []byte, flags int) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlock(b []byte) (err error) {
var _p0 unsafe.Pointer
if len(b) > 0 {
_p0 = unsafe.Pointer(&b[0])
} else {
_p0 = unsafe.Pointer(&_zero)
}
_, _, e1 := Syscall(SYS_MUNLOCK, uintptr(_p0), uintptr(len(b)), 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func Munlockall() (err error) { func Munlockall() (err error) {
_, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0) _, _, e1 := Syscall(SYS_MUNLOCKALL, 0, 0, 0)
if e1 != 0 { if e1 != 0 {