Add shim for reattach of processes
Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Remove runtime files from containerd Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Update supervisor for orphaned containers Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Remove ctr/container.go back to rpc calls Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Add attach to loaded container Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Add monitor based on epoll for process exits Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Convert pids in containerd to string This is so that we no longer care about linux or system level pids and processes in containerd have user defined process id(pid) kinda like the exec process ids that docker has today. Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Add reaper back to containerd Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Implement list containers with new process model Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Implement restore of processes Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Add NONBLOCK to exit fifo open Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Implement tty reattach Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Fix race in exit pipe creation Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Add delete to shim Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Update shim to use pid-file and not stdout Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
8d1f71c3d7
commit
fe38efda50
33 changed files with 1210 additions and 1836 deletions
955
linux/linux.go
955
linux/linux.go
|
@ -1,955 +0,0 @@
|
|||
// +build libcontainer
|
||||
|
||||
package linux
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
goruntime "runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/docker/containerd/runtime"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/specs"
|
||||
)
|
||||
|
||||
const (
|
||||
RLIMIT_CPU = iota // CPU time in sec
|
||||
RLIMIT_FSIZE // Maximum filesize
|
||||
RLIMIT_DATA // max data size
|
||||
RLIMIT_STACK // max stack size
|
||||
RLIMIT_CORE // max core file size
|
||||
RLIMIT_RSS // max resident set size
|
||||
RLIMIT_NPROC // max number of processes
|
||||
RLIMIT_NOFILE // max number of open files
|
||||
RLIMIT_MEMLOCK // max locked-in-memory address space
|
||||
RLIMIT_AS // address space limit
|
||||
RLIMIT_LOCKS // maximum file locks held
|
||||
RLIMIT_SIGPENDING // max number of pending signals
|
||||
RLIMIT_MSGQUEUE // maximum bytes in POSIX mqueues
|
||||
RLIMIT_NICE // max nice prio allowed to raise to
|
||||
RLIMIT_RTPRIO // maximum realtime priority
|
||||
RLIMIT_RTTIME // timeout for RT tasks in us
|
||||
)
|
||||
|
||||
var rlimitMap = map[string]int{
|
||||
"RLIMIT_CPU": RLIMIT_CPU,
|
||||
"RLIMIT_FSIZE": RLIMIT_FSIZE,
|
||||
"RLIMIT_DATA": RLIMIT_DATA,
|
||||
"RLIMIT_STACK": RLIMIT_STACK,
|
||||
"RLIMIT_CORE": RLIMIT_CORE,
|
||||
"RLIMIT_RSS": RLIMIT_RSS,
|
||||
"RLIMIT_NPROC": RLIMIT_NPROC,
|
||||
"RLIMIT_NOFILE": RLIMIT_NOFILE,
|
||||
"RLIMIT_MEMLOCK": RLIMIT_MEMLOCK,
|
||||
"RLIMIT_AS": RLIMIT_AS,
|
||||
"RLIMIT_LOCKS": RLIMIT_LOCKS,
|
||||
"RLIMIT_SGPENDING": RLIMIT_SIGPENDING,
|
||||
"RLIMIT_MSGQUEUE": RLIMIT_MSGQUEUE,
|
||||
"RLIMIT_NICE": RLIMIT_NICE,
|
||||
"RLIMIT_RTPRIO": RLIMIT_RTPRIO,
|
||||
"RLIMIT_RTTIME": RLIMIT_RTTIME,
|
||||
}
|
||||
|
||||
func strToRlimit(key string) (int, error) {
|
||||
rl, ok := rlimitMap[key]
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("Wrong rlimit value: %s", key)
|
||||
}
|
||||
return rl, nil
|
||||
}
|
||||
|
||||
const wildcard = -1
|
||||
|
||||
var allowedDevices = []*configs.Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
Major: wildcard,
|
||||
Minor: wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: wildcard,
|
||||
Minor: wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
{
|
||||
Path: "/dev/console",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "/dev/tty0",
|
||||
Type: 'c',
|
||||
Major: 4,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "/dev/tty1",
|
||||
Type: 'c',
|
||||
Major: 4,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 136,
|
||||
Minor: wildcard,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
// tuntap
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
}
|
||||
|
||||
var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{
|
||||
specs.PIDNamespace: configs.NEWPID,
|
||||
specs.NetworkNamespace: configs.NEWNET,
|
||||
specs.MountNamespace: configs.NEWNS,
|
||||
specs.UserNamespace: configs.NEWUSER,
|
||||
specs.IPCNamespace: configs.NEWIPC,
|
||||
specs.UTSNamespace: configs.NEWUTS,
|
||||
}
|
||||
|
||||
var mountPropagationMapping = map[string]int{
|
||||
"rprivate": syscall.MS_PRIVATE | syscall.MS_REC,
|
||||
"private": syscall.MS_PRIVATE,
|
||||
"rslave": syscall.MS_SLAVE | syscall.MS_REC,
|
||||
"slave": syscall.MS_SLAVE,
|
||||
"rshared": syscall.MS_SHARED | syscall.MS_REC,
|
||||
"shared": syscall.MS_SHARED,
|
||||
"": syscall.MS_PRIVATE | syscall.MS_REC,
|
||||
}
|
||||
|
||||
func init() {
|
||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||
goruntime.GOMAXPROCS(1)
|
||||
goruntime.LockOSThread()
|
||||
factory, _ := libcontainer.New("")
|
||||
if err := factory.StartInitialization(); err != nil {
|
||||
fmt.Fprint(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
panic("--this line should have never been executed, congratulations--")
|
||||
}
|
||||
}
|
||||
|
||||
type libcontainerProcess struct {
|
||||
process *libcontainer.Process
|
||||
spec specs.Process
|
||||
}
|
||||
|
||||
// change interface to support an error
|
||||
func (p *libcontainerProcess) Pid() (int, error) {
|
||||
pid, err := p.process.Pid()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return pid, nil
|
||||
}
|
||||
|
||||
func (p *libcontainerProcess) Spec() specs.Process {
|
||||
return p.spec
|
||||
}
|
||||
|
||||
func (p *libcontainerProcess) Signal(s os.Signal) error {
|
||||
return p.process.Signal(s)
|
||||
}
|
||||
|
||||
func (p *libcontainerProcess) Close() error {
|
||||
// in close we always need to call wait to close/flush any pipes
|
||||
p.process.Wait()
|
||||
// explicitly close any open fd on the process
|
||||
for _, cl := range []interface{}{
|
||||
p.process.Stderr,
|
||||
p.process.Stdout,
|
||||
p.process.Stdin,
|
||||
} {
|
||||
if cl != nil {
|
||||
if c, ok := cl.(io.Closer); ok {
|
||||
c.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type libcontainerContainer struct {
|
||||
c libcontainer.Container
|
||||
initProcess *libcontainerProcess
|
||||
additionalProcesses map[int]*libcontainerProcess
|
||||
exitStatus int
|
||||
exited bool
|
||||
path string
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Checkpoints() ([]runtime.Checkpoint, error) {
|
||||
out := []runtime.Checkpoint{}
|
||||
files, err := ioutil.ReadDir(c.getCheckpointPath(""))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return out, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
for _, fi := range files {
|
||||
out = append(out, runtime.Checkpoint{
|
||||
Name: fi.Name(),
|
||||
Timestamp: fi.ModTime(),
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) DeleteCheckpoint(name string) error {
|
||||
path := c.getCheckpointPath(name)
|
||||
if err := os.RemoveAll(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return runtime.ErrCheckpointNotExists
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) getCheckpointPath(name string) string {
|
||||
return filepath.Join(c.path, "checkpoints", name)
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Checkpoint(cp runtime.Checkpoint) error {
|
||||
opts := c.createCheckpointOpts(cp)
|
||||
if err := os.MkdirAll(filepath.Dir(opts.ImagesDirectory), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// mkdir is atomic so if it already exists we can fail
|
||||
if err := os.Mkdir(opts.ImagesDirectory, 0755); err != nil {
|
||||
if os.IsExist(err) {
|
||||
return runtime.ErrCheckpointExists
|
||||
}
|
||||
return err
|
||||
}
|
||||
if err := c.c.Checkpoint(opts); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) createCheckpointOpts(cp runtime.Checkpoint) *libcontainer.CriuOpts {
|
||||
opts := libcontainer.CriuOpts{}
|
||||
opts.LeaveRunning = !cp.Exit
|
||||
opts.ShellJob = cp.Shell
|
||||
opts.TcpEstablished = cp.Tcp
|
||||
opts.ExternalUnixConnections = cp.UnixSockets
|
||||
opts.ImagesDirectory = c.getCheckpointPath(cp.Name)
|
||||
return &opts
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Restore(name string) error {
|
||||
path := c.getCheckpointPath(name)
|
||||
var opts libcontainer.CriuOpts
|
||||
opts.ImagesDirectory = path
|
||||
return c.c.Restore(c.initProcess.process, &opts)
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Resume() error {
|
||||
return c.c.Resume()
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Pause() error {
|
||||
return c.c.Pause()
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) State() runtime.State {
|
||||
// TODO: what to do with error
|
||||
state, err := c.c.Status()
|
||||
if err != nil {
|
||||
return runtime.State("")
|
||||
}
|
||||
switch state {
|
||||
case libcontainer.Paused, libcontainer.Pausing:
|
||||
return runtime.Paused
|
||||
}
|
||||
return runtime.State("")
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) ID() string {
|
||||
return c.c.ID()
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Path() string {
|
||||
return c.path
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Pid() (int, error) {
|
||||
return c.initProcess.Pid()
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Start() error {
|
||||
return c.c.Start(c.initProcess.process)
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) SetExited(status int) {
|
||||
c.exitStatus = status
|
||||
// meh
|
||||
c.exited = true
|
||||
c.initProcess.Close()
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Stats() (*runtime.Stat, error) {
|
||||
now := time.Now()
|
||||
stats, err := c.c.Stats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &runtime.Stat{
|
||||
Timestamp: now,
|
||||
Data: stats,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Delete() error {
|
||||
return c.c.Destroy()
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) Processes() ([]runtime.Process, error) {
|
||||
procs := []runtime.Process{
|
||||
c.initProcess,
|
||||
}
|
||||
for _, p := range c.additionalProcesses {
|
||||
procs = append(procs, p)
|
||||
}
|
||||
return procs, nil
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) RemoveProcess(pid int) error {
|
||||
proc, ok := c.additionalProcesses[pid]
|
||||
if !ok {
|
||||
return runtime.ErrNotChildProcess
|
||||
}
|
||||
err := proc.Close()
|
||||
delete(c.additionalProcesses, pid)
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *libcontainerContainer) OOM() (<-chan struct{}, error) {
|
||||
return c.c.NotifyOOM()
|
||||
}
|
||||
|
||||
func NewRuntime(stateDir string) (runtime.Runtime, error) {
|
||||
f, err := libcontainer.New(stateDir, libcontainer.Cgroupfs, func(l *libcontainer.LinuxFactory) error {
|
||||
//l.CriuPath = context.GlobalString("criu")
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &libcontainerRuntime{
|
||||
factory: f,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type libcontainerRuntime struct {
|
||||
factory libcontainer.Factory
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) Type() string {
|
||||
return "libcontainer"
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) Create(id, bundlePath, consolePath string) (runtime.Container, *runtime.IO, error) {
|
||||
spec, rspec, err := r.loadSpec(
|
||||
filepath.Join(bundlePath, "config.json"),
|
||||
filepath.Join(bundlePath, "runtime.json"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
config, err := r.createLibcontainerConfig(id, bundlePath, spec, rspec)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
container, err := r.factory.Create(id, config)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("create container: %v", err)
|
||||
}
|
||||
process, err := r.newProcess(spec.Process)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
var rio runtime.IO
|
||||
if spec.Process.Terminal {
|
||||
if err := process.ConsoleFromPath(consolePath); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
} else {
|
||||
uid, err := config.HostUID()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
i, err := process.InitializeIO(uid)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
rio.Stdin = i.Stdin
|
||||
rio.Stderr = i.Stderr
|
||||
rio.Stdout = i.Stdout
|
||||
}
|
||||
c := &libcontainerContainer{
|
||||
c: container,
|
||||
additionalProcesses: make(map[int]*libcontainerProcess),
|
||||
initProcess: &libcontainerProcess{
|
||||
process: process,
|
||||
spec: spec.Process,
|
||||
},
|
||||
path: bundlePath,
|
||||
}
|
||||
return c, &rio, nil
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) StartProcess(ci runtime.Container, p specs.Process, consolePath string) (runtime.Process, *runtime.IO, error) {
|
||||
c, ok := ci.(*libcontainerContainer)
|
||||
if !ok {
|
||||
return nil, nil, runtime.ErrInvalidContainerType
|
||||
}
|
||||
process, err := r.newProcess(p)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
var rio runtime.IO
|
||||
if p.Terminal {
|
||||
if err := process.ConsoleFromPath(consolePath); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
} else {
|
||||
uid, err := c.c.Config().HostUID()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
i, err := process.InitializeIO(uid)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
rio.Stdin = i.Stdin
|
||||
rio.Stderr = i.Stderr
|
||||
rio.Stdout = i.Stdout
|
||||
}
|
||||
if err := c.c.Start(process); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
lp := &libcontainerProcess{
|
||||
process: process,
|
||||
spec: p,
|
||||
}
|
||||
pid, err := process.Pid()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
c.additionalProcesses[pid] = lp
|
||||
return lp, &rio, nil
|
||||
}
|
||||
|
||||
// newProcess returns a new libcontainer Process with the arguments from the
|
||||
// spec and stdio from the current process.
|
||||
func (r *libcontainerRuntime) newProcess(p specs.Process) (*libcontainer.Process, error) {
|
||||
return &libcontainer.Process{
|
||||
Args: p.Args,
|
||||
Env: p.Env,
|
||||
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
|
||||
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
|
||||
Cwd: p.Cwd,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// loadSpec loads the specification from the provided path.
|
||||
// If the path is empty then the default path will be "config.json"
|
||||
func (r *libcontainerRuntime) loadSpec(cPath, rPath string) (spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, err error) {
|
||||
cf, err := os.Open(cPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil, fmt.Errorf("JSON specification file at %s not found", cPath)
|
||||
}
|
||||
return spec, rspec, err
|
||||
}
|
||||
defer cf.Close()
|
||||
|
||||
rf, err := os.Open(rPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil, fmt.Errorf("JSON runtime config file at %s not found", rPath)
|
||||
}
|
||||
return spec, rspec, err
|
||||
}
|
||||
defer rf.Close()
|
||||
|
||||
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
|
||||
return spec, rspec, fmt.Errorf("unmarshal %s: %v", cPath, err)
|
||||
}
|
||||
if err = json.NewDecoder(rf).Decode(&rspec); err != nil {
|
||||
return spec, rspec, fmt.Errorf("unmarshal %s: %v", rPath, err)
|
||||
}
|
||||
return spec, rspec, r.checkSpecVersion(spec)
|
||||
}
|
||||
|
||||
// checkSpecVersion makes sure that the spec version matches runc's while we are in the initial
|
||||
// development period. It is better to hard fail than have missing fields or options in the spec.
|
||||
func (r *libcontainerRuntime) checkSpecVersion(s *specs.LinuxSpec) error {
|
||||
if s.Version != specs.Version {
|
||||
return fmt.Errorf("spec version is not compatible with implemented version %q: spec %q", specs.Version, s.Version)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) createLibcontainerConfig(cgroupName, bundlePath string, spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) (*configs.Config, error) {
|
||||
rootfsPath := spec.Root.Path
|
||||
if !filepath.IsAbs(rootfsPath) {
|
||||
rootfsPath = filepath.Join(bundlePath, rootfsPath)
|
||||
}
|
||||
config := &configs.Config{
|
||||
Rootfs: rootfsPath,
|
||||
Capabilities: spec.Linux.Capabilities,
|
||||
Readonlyfs: spec.Root.Readonly,
|
||||
Hostname: spec.Hostname,
|
||||
}
|
||||
for _, ns := range rspec.Linux.Namespaces {
|
||||
t, exists := namespaceMapping[ns.Type]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("namespace %q does not exist", ns)
|
||||
}
|
||||
config.Namespaces.Add(t, ns.Path)
|
||||
}
|
||||
if config.Namespaces.Contains(configs.NEWNET) {
|
||||
config.Networks = []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
},
|
||||
}
|
||||
}
|
||||
for _, mp := range spec.Mounts {
|
||||
m, ok := rspec.Mounts[mp.Name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Mount with Name %q not found in runtime config", mp.Name)
|
||||
}
|
||||
config.Mounts = append(config.Mounts, r.createLibcontainerMount(bundlePath, mp.Path, m))
|
||||
}
|
||||
|
||||
// Convert rootfs propagation flag
|
||||
if rspec.Linux.RootfsPropagation != "" {
|
||||
_, pflags, _ := parseMountOptions([]string{rspec.Linux.RootfsPropagation})
|
||||
if len(pflags) == 1 {
|
||||
config.RootPropagation = pflags[0]
|
||||
}
|
||||
}
|
||||
|
||||
if err := r.createDevices(rspec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := r.setupUserNamespace(rspec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, rlimit := range rspec.Linux.Rlimits {
|
||||
rl, err := r.createLibContainerRlimit(rlimit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Rlimits = append(config.Rlimits, rl)
|
||||
}
|
||||
c, err := r.createCgroupConfig(cgroupName, rspec, config.Devices)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Cgroups = c
|
||||
if config.Readonlyfs {
|
||||
r.setReadonly(config)
|
||||
config.MaskPaths = []string{
|
||||
"/proc/kcore",
|
||||
}
|
||||
config.ReadonlyPaths = []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
}
|
||||
}
|
||||
seccomp, err := r.setupSeccomp(&rspec.Linux.Seccomp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Seccomp = seccomp
|
||||
config.Sysctl = rspec.Linux.Sysctl
|
||||
config.ProcessLabel = rspec.Linux.SelinuxProcessLabel
|
||||
config.AppArmorProfile = rspec.Linux.ApparmorProfile
|
||||
for _, g := range spec.Process.User.AdditionalGids {
|
||||
config.AdditionalGroups = append(config.AdditionalGroups, strconv.FormatUint(uint64(g), 10))
|
||||
}
|
||||
r.createHooks(rspec, config)
|
||||
config.Version = specs.Version
|
||||
return config, nil
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) createLibcontainerMount(cwd, dest string, m specs.Mount) *configs.Mount {
|
||||
flags, pgflags, data := parseMountOptions(m.Options)
|
||||
source := m.Source
|
||||
if m.Type == "bind" {
|
||||
if !filepath.IsAbs(source) {
|
||||
source = filepath.Join(cwd, m.Source)
|
||||
}
|
||||
}
|
||||
return &configs.Mount{
|
||||
Device: m.Type,
|
||||
Source: source,
|
||||
Destination: dest,
|
||||
Data: data,
|
||||
Flags: flags,
|
||||
PropagationFlags: pgflags,
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *libcontainerRuntime) createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*configs.Device) (*configs.Cgroup, error) {
|
||||
cr := &configs.Cgroup{
|
||||
Name: name,
|
||||
Parent: "/containerd",
|
||||
}
|
||||
c := &configs.Resources{
|
||||
AllowedDevices: append(devices, allowedDevices...),
|
||||
}
|
||||
cr.Resources = c
|
||||
r := spec.Linux.Resources
|
||||
if r.Memory != nil {
|
||||
if r.Memory.Limit != nil {
|
||||
c.Memory = int64(*r.Memory.Limit)
|
||||
}
|
||||
if r.Memory.Reservation != nil {
|
||||
c.MemoryReservation = int64(*r.Memory.Reservation)
|
||||
}
|
||||
if r.Memory.Swap != nil {
|
||||
c.MemorySwap = int64(*r.Memory.Swap)
|
||||
}
|
||||
if r.Memory.Kernel != nil {
|
||||
c.KernelMemory = int64(*r.Memory.Kernel)
|
||||
}
|
||||
if r.Memory.Swappiness != nil {
|
||||
c.MemorySwappiness = int64(*r.Memory.Swappiness)
|
||||
}
|
||||
}
|
||||
if r.CPU != nil {
|
||||
if r.CPU.Shares != nil {
|
||||
c.CpuShares = int64(*r.CPU.Shares)
|
||||
}
|
||||
if r.CPU.Quota != nil {
|
||||
c.CpuQuota = int64(*r.CPU.Quota)
|
||||
}
|
||||
if r.CPU.Period != nil {
|
||||
c.CpuPeriod = int64(*r.CPU.Period)
|
||||
}
|
||||
if r.CPU.RealtimeRuntime != nil {
|
||||
c.CpuRtRuntime = int64(*r.CPU.RealtimeRuntime)
|
||||
}
|
||||
if r.CPU.RealtimePeriod != nil {
|
||||
c.CpuRtPeriod = int64(*r.CPU.RealtimePeriod)
|
||||
}
|
||||
if r.CPU.Cpus != nil {
|
||||
c.CpusetCpus = *r.CPU.Cpus
|
||||
}
|
||||
if r.CPU.Mems != nil {
|
||||
c.CpusetMems = *r.CPU.Mems
|
||||
}
|
||||
}
|
||||
if r.BlockIO != nil {
|
||||
if r.BlockIO.Weight != nil {
|
||||
c.BlkioWeight = *r.BlockIO.Weight
|
||||
}
|
||||
if r.BlockIO.LeafWeight != nil {
|
||||
c.BlkioLeafWeight = *r.BlockIO.LeafWeight
|
||||
}
|
||||
}
|
||||
for _, wd := range r.BlockIO.WeightDevice {
|
||||
weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, *wd.Weight, *wd.LeafWeight)
|
||||
c.BlkioWeightDevice = append(c.BlkioWeightDevice, weightDevice)
|
||||
}
|
||||
for _, td := range r.BlockIO.ThrottleReadBpsDevice {
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
||||
c.BlkioThrottleReadBpsDevice = append(c.BlkioThrottleReadBpsDevice, throttleDevice)
|
||||
}
|
||||
for _, td := range r.BlockIO.ThrottleWriteBpsDevice {
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
||||
c.BlkioThrottleWriteBpsDevice = append(c.BlkioThrottleWriteBpsDevice, throttleDevice)
|
||||
}
|
||||
for _, td := range r.BlockIO.ThrottleReadIOPSDevice {
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
||||
c.BlkioThrottleReadIOPSDevice = append(c.BlkioThrottleReadIOPSDevice, throttleDevice)
|
||||
}
|
||||
for _, td := range r.BlockIO.ThrottleWriteIOPSDevice {
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
||||
c.BlkioThrottleWriteIOPSDevice = append(c.BlkioThrottleWriteIOPSDevice, throttleDevice)
|
||||
}
|
||||
for _, l := range r.HugepageLimits {
|
||||
c.HugetlbLimit = append(c.HugetlbLimit, &configs.HugepageLimit{
|
||||
Pagesize: *l.Pagesize,
|
||||
Limit: *l.Limit,
|
||||
})
|
||||
}
|
||||
c.OomKillDisable = r.DisableOOMKiller != nil && *r.DisableOOMKiller
|
||||
if r.Network != nil {
|
||||
c.NetClsClassid = r.Network.ClassID
|
||||
for _, m := range r.Network.Priorities {
|
||||
c.NetPrioIfpriomap = append(c.NetPrioIfpriomap, &configs.IfPrioMap{
|
||||
Interface: m.Name,
|
||||
Priority: int64(m.Priority),
|
||||
})
|
||||
}
|
||||
}
|
||||
return cr, nil
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) createDevices(spec *specs.LinuxRuntimeSpec, config *configs.Config) error {
|
||||
for _, d := range spec.Linux.Devices {
|
||||
device := &configs.Device{
|
||||
Type: d.Type,
|
||||
Path: d.Path,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
Permissions: d.Permissions,
|
||||
FileMode: d.FileMode,
|
||||
Uid: d.UID,
|
||||
Gid: d.GID,
|
||||
}
|
||||
config.Devices = append(config.Devices, device)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) setReadonly(config *configs.Config) {
|
||||
for _, m := range config.Mounts {
|
||||
if m.Device == "sysfs" {
|
||||
m.Flags |= syscall.MS_RDONLY
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) setupUserNamespace(spec *specs.LinuxRuntimeSpec, config *configs.Config) error {
|
||||
if len(spec.Linux.UIDMappings) == 0 {
|
||||
return nil
|
||||
}
|
||||
config.Namespaces.Add(configs.NEWUSER, "")
|
||||
create := func(m specs.IDMapping) configs.IDMap {
|
||||
return configs.IDMap{
|
||||
HostID: int(m.HostID),
|
||||
ContainerID: int(m.ContainerID),
|
||||
Size: int(m.Size),
|
||||
}
|
||||
}
|
||||
for _, m := range spec.Linux.UIDMappings {
|
||||
config.UidMappings = append(config.UidMappings, create(m))
|
||||
}
|
||||
for _, m := range spec.Linux.GIDMappings {
|
||||
config.GidMappings = append(config.GidMappings, create(m))
|
||||
}
|
||||
rootUID, err := config.HostUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rootGID, err := config.HostGID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, node := range config.Devices {
|
||||
node.Uid = uint32(rootUID)
|
||||
node.Gid = uint32(rootGID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) createLibContainerRlimit(rlimit specs.Rlimit) (configs.Rlimit, error) {
|
||||
rl, err := strToRlimit(rlimit.Type)
|
||||
if err != nil {
|
||||
return configs.Rlimit{}, err
|
||||
}
|
||||
return configs.Rlimit{
|
||||
Type: rl,
|
||||
Hard: uint64(rlimit.Hard),
|
||||
Soft: uint64(rlimit.Soft),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// parseMountOptions parses the string and returns the flags, propagation
|
||||
// flags and any mount data that it contains.
|
||||
func parseMountOptions(options []string) (int, []int, string) {
|
||||
var (
|
||||
flag int
|
||||
pgflag []int
|
||||
data []string
|
||||
)
|
||||
flags := map[string]struct {
|
||||
clear bool
|
||||
flag int
|
||||
}{
|
||||
"async": {true, syscall.MS_SYNCHRONOUS},
|
||||
"atime": {true, syscall.MS_NOATIME},
|
||||
"bind": {false, syscall.MS_BIND},
|
||||
"defaults": {false, 0},
|
||||
"dev": {true, syscall.MS_NODEV},
|
||||
"diratime": {true, syscall.MS_NODIRATIME},
|
||||
"dirsync": {false, syscall.MS_DIRSYNC},
|
||||
"exec": {true, syscall.MS_NOEXEC},
|
||||
"mand": {false, syscall.MS_MANDLOCK},
|
||||
"noatime": {false, syscall.MS_NOATIME},
|
||||
"nodev": {false, syscall.MS_NODEV},
|
||||
"nodiratime": {false, syscall.MS_NODIRATIME},
|
||||
"noexec": {false, syscall.MS_NOEXEC},
|
||||
"nomand": {true, syscall.MS_MANDLOCK},
|
||||
"norelatime": {true, syscall.MS_RELATIME},
|
||||
"nostrictatime": {true, syscall.MS_STRICTATIME},
|
||||
"nosuid": {false, syscall.MS_NOSUID},
|
||||
"rbind": {false, syscall.MS_BIND | syscall.MS_REC},
|
||||
"relatime": {false, syscall.MS_RELATIME},
|
||||
"remount": {false, syscall.MS_REMOUNT},
|
||||
"ro": {false, syscall.MS_RDONLY},
|
||||
"rw": {true, syscall.MS_RDONLY},
|
||||
"strictatime": {false, syscall.MS_STRICTATIME},
|
||||
"suid": {true, syscall.MS_NOSUID},
|
||||
"sync": {false, syscall.MS_SYNCHRONOUS},
|
||||
}
|
||||
propagationFlags := map[string]struct {
|
||||
clear bool
|
||||
flag int
|
||||
}{
|
||||
"private": {false, syscall.MS_PRIVATE},
|
||||
"shared": {false, syscall.MS_SHARED},
|
||||
"slave": {false, syscall.MS_SLAVE},
|
||||
"unbindable": {false, syscall.MS_UNBINDABLE},
|
||||
"rprivate": {false, syscall.MS_PRIVATE | syscall.MS_REC},
|
||||
"rshared": {false, syscall.MS_SHARED | syscall.MS_REC},
|
||||
"rslave": {false, syscall.MS_SLAVE | syscall.MS_REC},
|
||||
"runbindable": {false, syscall.MS_UNBINDABLE | syscall.MS_REC},
|
||||
}
|
||||
for _, o := range options {
|
||||
// If the option does not exist in the flags table or the flag
|
||||
// is not supported on the platform,
|
||||
// then it is a data value for a specific fs type
|
||||
if f, exists := flags[o]; exists && f.flag != 0 {
|
||||
if f.clear {
|
||||
flag &= ^f.flag
|
||||
} else {
|
||||
flag |= f.flag
|
||||
}
|
||||
} else if f, exists := propagationFlags[o]; exists && f.flag != 0 {
|
||||
pgflag = append(pgflag, f.flag)
|
||||
} else {
|
||||
data = append(data, o)
|
||||
}
|
||||
}
|
||||
return flag, pgflag, strings.Join(data, ",")
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) setupSeccomp(config *specs.Seccomp) (*configs.Seccomp, error) {
|
||||
if config == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// No default action specified, no syscalls listed, assume seccomp disabled
|
||||
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
newConfig := new(configs.Seccomp)
|
||||
newConfig.Syscalls = []*configs.Syscall{}
|
||||
|
||||
if len(config.Architectures) > 0 {
|
||||
newConfig.Architectures = []string{}
|
||||
for _, arch := range config.Architectures {
|
||||
newArch, err := seccomp.ConvertStringToArch(string(arch))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newConfig.Architectures = append(newConfig.Architectures, newArch)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert default action from string representation
|
||||
newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newConfig.DefaultAction = newDefaultAction
|
||||
|
||||
// Loop through all syscall blocks and convert them to libcontainer format
|
||||
for _, call := range config.Syscalls {
|
||||
newAction, err := seccomp.ConvertStringToAction(string(call.Action))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newCall := configs.Syscall{
|
||||
Name: call.Name,
|
||||
Action: newAction,
|
||||
Args: []*configs.Arg{},
|
||||
}
|
||||
|
||||
// Loop through all the arguments of the syscall and convert them
|
||||
for _, arg := range call.Args {
|
||||
newOp, err := seccomp.ConvertStringToOperator(string(arg.Op))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newArg := configs.Arg{
|
||||
Index: arg.Index,
|
||||
Value: arg.Value,
|
||||
ValueTwo: arg.ValueTwo,
|
||||
Op: newOp,
|
||||
}
|
||||
|
||||
newCall.Args = append(newCall.Args, &newArg)
|
||||
}
|
||||
|
||||
newConfig.Syscalls = append(newConfig.Syscalls, &newCall)
|
||||
}
|
||||
|
||||
return newConfig, nil
|
||||
}
|
||||
|
||||
func (r *libcontainerRuntime) createHooks(rspec *specs.LinuxRuntimeSpec, config *configs.Config) {
|
||||
config.Hooks = &configs.Hooks{}
|
||||
for _, h := range rspec.Hooks.Prestart {
|
||||
cmd := configs.Command{
|
||||
Path: h.Path,
|
||||
Args: h.Args,
|
||||
Env: h.Env,
|
||||
}
|
||||
config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd))
|
||||
}
|
||||
for _, h := range rspec.Hooks.Poststop {
|
||||
cmd := configs.Command{
|
||||
Path: h.Path,
|
||||
Args: h.Args,
|
||||
Env: h.Env,
|
||||
}
|
||||
config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd))
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue