From c76f883ccd3a1e070dc2484193578c3960dfec50 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 5 Oct 2016 15:07:20 -0700 Subject: [PATCH] Finish port of shim package Signed-off-by: Michael Crosby --- container.go | 5 + containerd-shim/main.go | 6 + oci/oci.go | 10 ++ shim/process.go | 264 +++++++++++++++++++++++++++++++++++++--- shim/shim.go | 229 +++++++++++++++++++++++----------- 5 files changed, 424 insertions(+), 90 deletions(-) diff --git a/container.go b/container.go index f8f8854..4ae79d7 100644 --- a/container.go +++ b/container.go @@ -126,6 +126,11 @@ func (c *Container) Path() string { return c.path } +// Spec returns the OCI runtime spec for the container +func (c *Container) Spec() *specs.Spec { + return c.s +} + // Create will create the container on the system by running the runtime's // initial setup and process waiting for the user process to be started func (c *Container) Create() error { diff --git a/containerd-shim/main.go b/containerd-shim/main.go index 0d69363..1cea67e 100644 --- a/containerd-shim/main.go +++ b/containerd-shim/main.go @@ -139,6 +139,7 @@ func start(log *os.File) error { p.stdinCloser.Close() } case 1: + // resize if p.console == nil { continue } @@ -147,6 +148,11 @@ func start(log *os.File) error { Height: uint16(msg.Height), } term.SetWinsize(p.console.Fd(), &ws) + case 2: + // signal + if err := syscall.Kill(p.pid(), msg.Width); err != nil { + writeMessage(log, "warn", err) + } } } } diff --git a/oci/oci.go b/oci/oci.go index 06a2417..ed60325 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -2,6 +2,7 @@ package oci import ( "encoding/json" + "errors" "fmt" "io/ioutil" "os" @@ -13,6 +14,8 @@ import ( "github.com/docker/containerkit" ) +var ErrRootEmpty = errors.New("oci: runtime root cannot be an empty string") + type Opts struct { Name string Root string @@ -20,6 +23,9 @@ type Opts struct { } func New(opts Opts) (*OCIRuntime, error) { + if opts.Root == "" { + return nil, ErrRootEmpty + } if err := os.MkdirAll(opts.Root, 0711); err != nil { return nil, err } @@ -44,6 +50,10 @@ func (r *OCIRuntime) Name() string { return r.name } +func (r *OCIRuntime) Args() []string { + return r.args +} + func (r *OCIRuntime) Create(c *containerkit.Container) (containerkit.ProcessDelegate, error) { pidFile := fmt.Sprintf("%s/%s.pid", filepath.Join(r.root, c.ID()), "init") cmd := r.Command("create", "--pid-file", pidFile, "--bundle", c.Path(), c.ID()) diff --git a/shim/process.go b/shim/process.go index 1a7da1f..aa64bec 100644 --- a/shim/process.go +++ b/shim/process.go @@ -1,4 +1,4 @@ -package process +package shim import ( "encoding/json" @@ -16,13 +16,90 @@ import ( "time" "github.com/Sirupsen/logrus" + "github.com/docker/containerkit" + specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" ) -var errInvalidPidInt = errors.New("containerd: process pid is invalid") +var ( + ErrContainerStartTimeout = errors.New("shim: container did not start before the specified timeout") + ErrContainerNotStarted = errors.New("shim: container not started") + ErrProcessNotExited = errors.New("containerd: process has not exited") + ErrShimExited = errors.New("containerd: shim exited before container process was started") + errInvalidPidInt = errors.New("shim: process pid is invalid") +) +const UnknownStatus = 255 + +func newProcess(root string, noPivotRoot bool, checkpoint string, c *containerkit.Container, cmd *exec.Cmd) (*process, error) { + var ( + spec = c.Spec() + stdin, stdout, stderr string + ) + uid, gid, err := getRootIDs(spec) + if err != nil { + return nil, err + } + for _, t := range []struct { + path *string + v interface{} + }{ + { + path: &stdin, + v: c.Stdin, + }, + { + path: &stdout, + v: c.Stdout, + }, + { + path: &stderr, + v: c.Stderr, + }, + } { + p, err := getFifoPath(t.v) + if err != nil { + return nil, err + } + *t.path = p + } + p := &process{ + root: root, + cmd: cmd, + done: make(chan struct{}), + spec: spec.Process, + exec: false, + rootUid: uid, + rootGid: gid, + noPivotRoot: noPivotRoot, + checkpoint: checkpoint, + stdin: stdin, + stdout: stdout, + stderr: stderr, + } + f, err := os.Create(filepath.Join(root, "process.json")) + if err != nil { + return nil, err + } + err = json.NewEncoder(f).Encode(p) + f.Close() + if err != nil { + return nil, err + } + exit, err := getExitPipe(filepath.Join(root, "exit")) + if err != nil { + return nil, err + } + control, err := getControlPipe(filepath.Join(root, "control")) + if err != nil { + return nil, err + } + p.exit, p.control = exit, control + return p, nil +} + +// TODO: control and exit fifo type process struct { - name string root string cmd *exec.Cmd done chan struct{} @@ -30,7 +107,100 @@ type process struct { startTime string mu sync.Mutex containerPid int - timeout time.Duration + exit *os.File + control *os.File + + spec specs.Process + noPivotRoot bool + exec bool + rootUid int + rootGid int + checkpoint string + stdin string + stdout string + stderr string +} + +type processState struct { + specs.Process + Exec bool `json:"exec"` + RootUID int `json:"rootUID"` + RootGID int `json:"rootGID"` + Checkpoint string `json:"checkpoint"` + NoPivotRoot bool `json:"noPivotRoot"` + RuntimeArgs []string `json:"runtimeArgs"` + // Stdin fifo filepath + Stdin string `json:"stdin"` + // Stdout fifo filepath + Stdout string `json:"stdout"` + // Stderr fifo filepath + Stderr string `json:"stderr"` +} + +func (p *process) MarshalJSON() ([]byte, error) { + ps := processState{ + Process: p.spec, + NoPivotRoot: p.noPivotRoot, + Checkpoint: p.checkpoint, + RootUID: p.rootUid, + RootGID: p.rootGid, + Exec: p.exec, + Stdin: p.stdin, + Stdout: p.stdout, + Stderr: p.stderr, + } + return json.Marshal(ps) +} + +func (p *process) UnmarshalJSON(b []byte) error { + var ps processState + if err := json.Unmarshal(b, &ps); err != nil { + return err + } + p.spec = ps.Process + p.noPivotRoot = ps.NoPivotRoot + p.rootGid = ps.RootGID + p.rootUid = ps.RootUID + p.checkpoint = ps.Checkpoint + p.exec = ps.Exec + p.stdin = ps.Stdin + p.stdout = ps.Stdout + p.stderr = ps.Stderr + // TODO: restore pid/exit/control and stuff ? + return nil +} + +func (p *process) Pid() int { + return p.containerPid +} + +func (p *process) Wait() (rst uint32, rerr error) { + var b []byte + if _, err := p.exit.Read(b); err != nil { + return 255, err + } + data, err := ioutil.ReadFile(filepath.Join(p.root, "exitStatus")) + defer func() { + if rerr != nil { + rst, rerr = p.handleSigkilledShim(rst, rerr) + } + }() + if err != nil { + if os.IsNotExist(err) { + return UnknownStatus, ErrProcessNotExited + } + return UnknownStatus, err + } + if len(data) == 0 { + return UnknownStatus, ErrProcessNotExited + } + i, err := strconv.ParseUint(string(data), 10, 32) + return uint32(i), err +} + +func (p *process) Signal(s os.Signal) error { + _, err := fmt.Fprintf(p.control, "%d %d %d\n", 2, s, 0) + return err } // same checks if the process is the same process originally launched @@ -39,7 +209,7 @@ func (p *process) same() (bool, error) { if p.startTime == "" { return true, nil } - pid, err := p.readContainerPid() + pid, err := readPid(filepath.Join(p.root, "pid")) if err != nil { return false, nil } @@ -57,15 +227,15 @@ func (p *process) checkExited() { } if same, _ := p.same(); same && p.hasPid() { // The process changed its PR_SET_PDEATHSIG, so force kill it - logrus.Infof("containerd: %s:%s (pid %v) has become an orphan, killing it", p.container.id, p.namae, p.containerPid) + logrus.Infof("containerd: (pid %v) has become an orphan, killing it", p.containerPid) if err := unix.Kill(p.containerPid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { - logrus.Errorf("containerd: unable to SIGKILL %s:%s (pid %v): %v", p.container.id, p.name, p.containerPid, err) + logrus.Errorf("containerd: unable to SIGKILL (pid %v): %v", p.containerPid, err) close(p.done) return } // wait for the container process to exit for { - if err := unix.Kill(p.pid, 0); err != nil { + if err := unix.Kill(p.containerPid, 0); err != nil { break } time.Sleep(5 * time.Millisecond) @@ -92,9 +262,9 @@ type pidResponse struct { err error } -func (p *process) waitForCreate() error { +func (p *process) waitForCreate(timeout time.Duration) error { r := make(chan pidResponse, 1) - go readContainerPid(wc) + go p.readContainerPid(r) select { case resp := <-r: @@ -104,24 +274,27 @@ func (p *process) waitForCreate() error { p.setPid(resp.pid) started, err := readProcessStartTime(resp.pid) if err != nil { - logrus.Warnf("containerd: unable to save %s:%s starttime: %v", p.container.id, p.id, err) + logrus.Warnf("containerd: unable to save starttime: %v", err) } // TODO: save start time to disk or process state file p.startTime = started return nil - case <-time.After(c.timeout): + case <-time.After(timeout): p.cmd.Process.Kill() p.cmd.Wait() return ErrContainerStartTimeout } } -func readContainerPid(r chan pidResponse, pidFile string) { +func (p *process) readContainerPid(r chan pidResponse) { + pidFile := filepath.Join(p.root, "pid") for { - pid, err := readContainerPid(pidFile) + pid, err := readPid(pidFile) if err != nil { if os.IsNotExist(err) || err == errInvalidPidInt { - if serr := checkErrorLogs(); serr != nil { + if serr := checkErrorLogs(p.cmd, + filepath.Join(p.root, "shim-log.json"), + filepath.Join(p.root, "log.json")); serr != nil { r <- pidResponse{ err: err, } @@ -142,6 +315,45 @@ func readContainerPid(r chan pidResponse, pidFile string) { } } +func (p *process) handleSigkilledShim(rst uint32, rerr error) (uint32, error) { + if err := unix.Kill(p.containerPid, 0); err == syscall.ESRCH { + logrus.Warnf("containerd: (pid %d) does not exist", p.containerPid) + // The process died while containerd was down (probably of + // SIGKILL, but no way to be sure) + return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus) + } + + // If it's not the same process, just mark it stopped and set + // the status to the UnknownStatus value (i.e. 255) + if same, _ := p.same(); !same { + // Create the file so we get the exit event generated once monitor kicks in + // without having to go through all this process again + return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus) + } + ppid, err := readProcStatField(p.containerPid, 4) + if err != nil { + return rst, fmt.Errorf("could not check process ppid: %v (%v)", err, rerr) + } + if ppid == "1" { + if err := unix.Kill(p.containerPid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { + return UnknownStatus, fmt.Errorf( + "containerd: unable to SIGKILL (pid %v): %v", p.containerPid, err) + } + // wait for the process to die + for { + if err := unix.Kill(p.containerPid, 0); err == syscall.ESRCH { + break + } + time.Sleep(5 * time.Millisecond) + } + // Create the file so we get the exit event generated once monitor kicks in + // without having to go through all this process again + status := 128 + uint32(syscall.SIGKILL) + return status, writeExitStatus(filepath.Join(p.root, "exitStatus"), status) + } + return rst, rerr +} + func checkErrorLogs(cmd *exec.Cmd, shimLogPath, runtimeLogPath string) error { alive, err := isAlive(cmd) if err != nil { @@ -200,7 +412,7 @@ func readProcStatField(pid int, field int) (string, error) { return strings.Split(parts[1], ") ")[0], nil } -func readContainerPid(pidFile string) (int, error) { +func readPid(pidFile string) (int, error) { data, err := ioutil.ReadFile(pidFile) if err != nil { return -1, nil @@ -251,3 +463,23 @@ func readLogMessages(path string) ([]message, error) { } return out, nil } + +func getExitPipe(path string) (*os.File, error) { + if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) { + return nil, err + } + // add NONBLOCK in case the other side has already closed or else + // this function would never return + return os.OpenFile(path, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) +} + +func getControlPipe(path string) (*os.File, error) { + if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) { + return nil, err + } + return os.OpenFile(path, syscall.O_RDWR|syscall.O_NONBLOCK, 0) +} + +func writeExitStatus(path string, status uint32) error { + return ioutil.WriteFile(path, []byte(fmt.Sprintf("%u", status)), 0644) +} diff --git a/shim/shim.go b/shim/shim.go index 6b05302..09f92b2 100644 --- a/shim/shim.go +++ b/shim/shim.go @@ -2,6 +2,7 @@ package shim import ( "encoding/json" + "errors" "fmt" "os" "os/exec" @@ -12,6 +13,7 @@ import ( "github.com/docker/containerd/oci" "github.com/docker/containerkit" + specs "github.com/opencontainers/runtime-spec/specs-go" ) /* @@ -27,57 +29,64 @@ import ( │   │   │   ├── shim-log.json │   │   │   └── starttime │   │   └── state.json -│   └── ff2e86955c2be43f0e3c300fbd3786599301bd8efcaa5a386587f132e73af242 -│   ├── config.json -│   ├── init-stderr -│   ├── init-stdin -│   └── init-stdout */ -type Opts struct { - Name string - RuntimeName string - RuntimeLogFile string - RuntimeArgs []string - Root string - Timeout time.Duration -} +var ( + ErrNotFifo = errors.New("shim: IO is not a valid fifo on disk") + errInitProcessNotExist = errors.New("shim: init process does not exist") +) -type state struct { - Bundle string `json:"bundle"` - Stdin string `json:"stdin"` - Stdout string `json:"stdout"` - Stderr string `json:"stderr"` - Runtime string `json:"runtime"` - RuntimeArgs []string `json:"runtimeArgs"` - Shim string `json:"shim"` - NoPivotRoot bool `json:"noPivotRoot"` +type Opts struct { + Name string + RuntimeName string + RuntimeArgs []string + NoPivotRoot bool + Root string + Timeout time.Duration } func New(opts Opts) (*Shim, error) { - if err := os.MkdirAll(opts.Root, 0711); err != nil { + if err := os.Mkdir(opts.Root, 0711); err != nil { return nil, err } r, err := oci.New(oci.Opts{ - Name: opts.RuntimeName, - LogFile: opts.RuntimeLogFile, - Args: opts.RuntimeArgs, + Name: opts.RuntimeName, + Args: opts.RuntimeArgs, }) if err != nil { return nil, err } - return &Shim{ - root: opts.Root, - name: opts.Name, - timeout: opts.Timeout, - runtime: r, - }, nil + s := &Shim{ + root: opts.Root, + name: opts.Name, + timeout: opts.Timeout, + runtime: r, + processes: make(map[string]*process), + } + f, err := os.Create(filepath.Join(opts.Root, "state.json")) + if err != nil { + return nil, err + } + err = json.NewEncoder(f).Encode(s) + f.Close() + return s, err } // Load will load an existing shim with all its information restored from the // provided path -func Load(path string) (*Shim, error) { - +func Load(root string) (*Shim, error) { + f, err := os.Open(filepath.Join(root, "state.json")) + if err != nil { + return nil, err + } + var s Shim + err = json.NewDecoder(f).Decode(&s) + f.Close() + if err != nil { + return nil, err + } + // TODO: read processes into memory + return &s, nil } // Shim is a container runtime that adds a shim process as the container's parent @@ -88,50 +97,82 @@ func Load(path string) (*Shim, error) { type Shim struct { // root holds runtime state information for the containers // launched by the runtime - root string - // name is the name of the runtime, i.e. runc - name string - timeout time.Duration + root string + name string + timeout time.Duration + noPivotRoot bool + runtime *oci.OCIRuntime + pmu sync.Mutex + processes map[string]*process + bundle string + checkpoint string +} - runtime *oci.OCIRuntime - pmu sync.Mutex - initProcesses map[string]*process +type state struct { + // Bundle is the path to the container's bundle + Bundle string `json:"bundle"` + // OCI runtime binary name + Runtime string `json:"runtime"` + // OCI runtime args + RuntimeArgs []string `json:"runtimeArgs"` + // Shim binary name + Name string `json:"shim"` + /// NoPivotRoot option + NoPivotRoot bool `json:"noPivotRoot"` + // Timeout for container start + Timeout time.Duration `json:"timeout"` +} + +func (s *Shim) MarshalJSON() ([]byte, error) { + st := state{ + Name: s.name, + Bundle: s.bundle, + Runtime: s.runtime.Name(), + RuntimeArgs: s.runtime.Args(), + NoPivotRoot: s.noPivotRoot, + Timeout: s.timeout, + } + return json.Marshal(st) +} + +func (s *Shim) UnmarshalJSON(b []byte) error { + var st state + if err := json.Unmarshal(b, &st); err != nil { + return err + } + s.name = st.Name + s.bundle = st.Bundle + s.timeout = st.Timeout + s.noPivotRoot = st.NoPivotRoot + r, err := oci.New(oci.Opts{ + Name: st.Runtime, + Args: st.RuntimeArgs, + }) + if err != nil { + return err + } + s.runtime = r + return nil } func (s *Shim) Create(c *containerkit.Container) (containerkit.ProcessDelegate, error) { - if err := os.Mkdir(filepath.Join(c.root, c.id), 0711); err != nil { - return nil, err - } - f, err := os.Create(filepath.Join(c.root, c.id, StateFile)) - if err != nil { - return nil, err - } - err = json.NewEncoder(f).Encode(state{ - Bundle: c.bundle, - Labels: c.labels, - Runtime: c.runtime, - RuntimeArgs: c.runtimeArgs, - Shim: c.shim, - NoPivotRoot: opts.NoPivotRoot, - }) - f.Close() - if err != nil { - return nil, err - } - cmd := s.command(c.ID(), c.Path(), s.runtime.Name()) + var ( + root = filepath.Join(s.root, "init") + cmd = s.command(c.ID(), c.Path(), s.runtime.Name()) + ) // exec the shim inside the state directory setup with the process // information for what is being run - cmd.Dir = processRoot + cmd.Dir = root // make sure the shim is in a new process group cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } - p, err := s.startCommand("init", cmd) + p, err := s.startCommand(c, cmd) if err != nil { return nil, err } s.pmu.Lock() - s.initProcesses[c.ID()] = p + s.processes["init"] = p s.pmu.Unlock() // ~TODO: oom and stats stuff here return p, nil @@ -176,7 +217,7 @@ func (s *Shim) Start(c *containerkit.Container) error { func (s *Shim) getContainerInit(c *containerkit.Container) (*process, error) { s.pmu.Lock() - p, ok := s.initProcesses[c.ID()] + p, ok := s.processes["init"] s.pmu.Unlock() if !ok { return nil, errInitProcessNotExist @@ -184,17 +225,15 @@ func (s *Shim) getContainerInit(c *containerkit.Container) (*process, error) { return p, nil } -func (s *Shim) startCommand(processName string, cmd *exec.Cmd) (*process, error) { - p := &process{ - name: processName, - cmd: cmd, - done: make(chan struct{}), - timeout: s.timeout, +func (s *Shim) startCommand(c *containerkit.Container, cmd *exec.Cmd) (*process, error) { + p, err := newProcess(filepath.Join(s.root, "init"), s.noPivotRoot, s.checkpoint, c, cmd) + if err != nil { + return nil, err } if err := cmd.Start(); err != nil { - close(proc.done) + close(p.done) if checkShimNotFound(err) { - return fmt.Errorf("%s not install on system", s.name) + return nil, fmt.Errorf("%s not install on system", s.name) } return nil, err } @@ -202,7 +241,7 @@ func (s *Shim) startCommand(processName string, cmd *exec.Cmd) (*process, error) defer func() { go p.checkExited() }() - if err := p.waitForCreate(); err != nil { + if err := p.waitForCreate(s.timeout); err != nil { return nil, err } return p, nil @@ -221,3 +260,45 @@ func checkShimNotFound(err error) bool { } return false } + +// getFifoPath returns the path to the fifo on disk as long as the provided +// interface is an *os.File and has a valid path on the Name() method call +func getFifoPath(v interface{}) (string, error) { + f, ok := v.(*os.File) + if !ok { + return "", ErrNotFifo + } + p := f.Name() + if p == "" { + return "", ErrNotFifo + } + return p, nil +} + +func getRootIDs(s *specs.Spec) (int, int, error) { + if s == nil { + return 0, 0, nil + } + var hasUserns bool + for _, ns := range s.Linux.Namespaces { + if ns.Type == specs.UserNamespace { + hasUserns = true + break + } + } + if !hasUserns { + return 0, 0, nil + } + uid := hostIDFromMap(0, s.Linux.UIDMappings) + gid := hostIDFromMap(0, s.Linux.GIDMappings) + return uid, gid, nil +} + +func hostIDFromMap(id uint32, mp []specs.IDMapping) int { + for _, m := range mp { + if (id >= m.ContainerID) && (id <= (m.ContainerID + m.Size - 1)) { + return int(m.HostID + (id - m.ContainerID)) + } + } + return 0 +}