diff --git a/execution/executors/shim/containerd-shim/console.go b/execution/executors/shim/containerd-shim/console.go deleted file mode 100644 index 1d3262d..0000000 --- a/execution/executors/shim/containerd-shim/console.go +++ /dev/null @@ -1,56 +0,0 @@ -// +build !solaris - -package main - -import ( - "fmt" - "os" - "syscall" - "unsafe" -) - -// NewConsole returns an initialized console that can be used within a container by copying bytes -// from the master side to the slave that is attached as the tty for the container's init process. -func newConsole(uid, gid int) (*os.File, string, error) { - master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) - if err != nil { - return nil, "", err - } - console, err := ptsname(master) - if err != nil { - return nil, "", err - } - if err := unlockpt(master); err != nil { - return nil, "", err - } - if err := os.Chmod(console, 0600); err != nil { - return nil, "", err - } - if err := os.Chown(console, uid, gid); err != nil { - return nil, "", err - } - return master, console, nil -} - -func ioctl(fd uintptr, flag, data uintptr) error { - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { - return err - } - return nil -} - -// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. -// unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { - var u int32 - return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) -} - -// ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { - var n int32 - if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { - return "", err - } - return fmt.Sprintf("/dev/pts/%d", n), nil -} diff --git a/execution/executors/shim/containerd-shim/console_solaris.go b/execution/executors/shim/containerd-shim/console_solaris.go deleted file mode 100644 index 37b3368..0000000 --- a/execution/executors/shim/containerd-shim/console_solaris.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build solaris - -package main - -import ( - "errors" - "os" -) - -// NewConsole returns an initalized console that can be used within a container by copying bytes -// from the master side to the slave that is attached as the tty for the container's init process. -func newConsole(uid, gid int) (*os.File, string, error) { - return nil, "", errors.New("newConsole not implemented on Solaris") -} diff --git a/execution/executors/shim/containerd-shim/example/config.json b/execution/executors/shim/containerd-shim/example/config.json deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/init/exit b/execution/executors/shim/containerd-shim/example/init/exit deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/init/pid b/execution/executors/shim/containerd-shim/example/init/pid deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/init/process.json b/execution/executors/shim/containerd-shim/example/init/process.json deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/init/resize b/execution/executors/shim/containerd-shim/example/init/resize deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/init/stderr b/execution/executors/shim/containerd-shim/example/init/stderr deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/init/stdin b/execution/executors/shim/containerd-shim/example/init/stdin deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/init/stdout b/execution/executors/shim/containerd-shim/example/init/stdout deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/logger/exit b/execution/executors/shim/containerd-shim/example/logger/exit deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/logger/pid b/execution/executors/shim/containerd-shim/example/logger/pid deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/logger/process.json b/execution/executors/shim/containerd-shim/example/logger/process.json deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/logger/resize b/execution/executors/shim/containerd-shim/example/logger/resize deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/logger/stderr b/execution/executors/shim/containerd-shim/example/logger/stderr deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/logger/stdin b/execution/executors/shim/containerd-shim/example/logger/stdin deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/example/logger/stdout b/execution/executors/shim/containerd-shim/example/logger/stdout deleted file mode 100644 index e69de29..0000000 diff --git a/execution/executors/shim/containerd-shim/main.go b/execution/executors/shim/containerd-shim/main.go deleted file mode 100644 index 71948af..0000000 --- a/execution/executors/shim/containerd-shim/main.go +++ /dev/null @@ -1,169 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "os" - "os/signal" - "path/filepath" - "syscall" - - "github.com/docker/containerd/sys" - "github.com/docker/docker/pkg/term" -) - -func writeMessage(f *os.File, level string, err error) { - fmt.Fprintf(f, `{"level": "%s","msg": "%s"}`, level, err) -} - -type controlMessage struct { - Type int - Width int - Height int -} - -// containerd-shim is a small shim that sits in front of a runtime implementation -// that allows it to be repartented to init and handle reattach from the caller. -// -// the cwd of the shim should be the path to the state directory where the shim -// can locate fifos and other information. -// Arg0: id of the container -// Arg1: bundle path -// Arg2: runtime binary -func main() { - flag.Parse() - cwd, err := os.Getwd() - if err != nil { - panic(err) - } - f, err := os.OpenFile(filepath.Join(cwd, "shim-log.json"), os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0666) - if err != nil { - panic(err) - } - if err := start(f); err != nil { - // this means that the runtime failed starting the container and will have the - // proper error messages in the runtime log so we should to treat this as a - // shim failure because the sim executed properly - if err == errRuntime { - f.Close() - return - } - // log the error instead of writing to stderr because the shim will have - // /dev/null as it's stdio because it is supposed to be reparented to system - // init and will not have anyone to read from it - writeMessage(f, "error", err) - f.Close() - os.Exit(1) - } -} - -func start(log *os.File) error { - // start handling signals as soon as possible so that things are properly reaped - // or if runtime exits before we hit the handler - signals := make(chan os.Signal, 2048) - signal.Notify(signals) - // set the shim as the subreaper for all orphaned processes created by the container - if err := sys.SetSubreaper(1); err != nil { - return err - } - // open the exit pipe - f, err := os.OpenFile("exit", syscall.O_WRONLY, 0) - if err != nil { - return fmt.Errorf("open exit fifo %s", err) - } - defer f.Close() - control, err := os.OpenFile("control", syscall.O_RDWR, 0) - if err != nil { - return fmt.Errorf("open control fifo %s", err) - } - defer control.Close() - p, err := newProcess(flag.Arg(0), flag.Arg(1), flag.Arg(2)) - if err != nil { - return err - } - defer func() { - if err := p.Close(); err != nil { - writeMessage(log, "warn", fmt.Errorf("close stdio %s", err)) - } - }() - if err := p.create(); err != nil { - p.delete() - return err - } - msgC := make(chan controlMessage, 32) - go func() { - for { - var m controlMessage - if _, err := fmt.Fscanf(control, "%d %d %d\n", &m.Type, &m.Width, &m.Height); err != nil { - continue - } - msgC <- m - } - }() - var exitShim bool - for { - select { - case s := <-signals: - switch s { - case syscall.SIGCHLD: - exits, _ := sys.Reap(false) - for _, e := range exits { - // check to see if runtime is one of the processes that has exited - if e.Pid == p.pid() { - exitShim = true - writeInt("exitStatus", e.Status) - } - } - } - // runtime has exited so the shim can also exit - if exitShim { - // Let containerd take care of calling the runtime - // delete. - // This is needed to be done first in order to ensure - // that the call to Reap does not block until all - // children of the container have died if init was not - // started in its own PID namespace. - f.Close() - // Wait for all the childs this process may have - // created (needed for exec and init processes when - // they join another pid namespace) - p.Wait() - return nil - } - case msg := <-msgC: - switch msg.Type { - case 0: - // close stdin - if p.stdinCloser != nil { - p.stdinCloser.Close() - } - case 1: - // resize - if p.console == nil { - continue - } - ws := term.Winsize{ - Width: uint16(msg.Width), - Height: uint16(msg.Height), - } - term.SetWinsize(p.console.Fd(), &ws) - case 2: - // signal - if err := syscall.Kill(p.pid(), syscall.Signal(msg.Width)); err != nil { - writeMessage(log, "warn", fmt.Errorf("signal pid %d: %s", msg.Width, err)) - } - } - } - } - return nil -} - -func writeInt(path string, i int) error { - f, err := os.Create(path) - if err != nil { - return err - } - defer f.Close() - _, err = fmt.Fprintf(f, "%d", i) - return err -} diff --git a/execution/executors/shim/containerd-shim/process.go b/execution/executors/shim/containerd-shim/process.go deleted file mode 100644 index 952677c..0000000 --- a/execution/executors/shim/containerd-shim/process.go +++ /dev/null @@ -1,369 +0,0 @@ -package main - -import ( - "encoding/json" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strconv" - "sync" - "syscall" - "time" - - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -var errRuntime = errors.New("shim: runtime execution error") - -type checkpoint struct { - // Timestamp is the time that checkpoint happened - Created time.Time `json:"created"` - // Name is the name of the checkpoint - Name string `json:"name"` - // TCP checkpoints open tcp connections - TCP bool `json:"tcp"` - // UnixSockets persists unix sockets in the checkpoint - UnixSockets bool `json:"unixSockets"` - // Shell persists tty sessions in the checkpoint - Shell bool `json:"shell"` - // Exit exits the container after the checkpoint is finished - Exit bool `json:"exit"` - // EmptyNS tells CRIU not to restore a particular namespace - EmptyNS []string `json:"emptyNS,omitempty"` -} - -type processState struct { - specs.Process - Exec bool `json:"exec"` - Stdin string `json:"stdin"` - Stdout string `json:"stdout"` - Stderr string `json:"stderr"` - RuntimeArgs []string `json:"runtimeArgs"` - NoPivotRoot bool `json:"noPivotRoot"` - CheckpointPath string `json:"checkpoint"` - RootUID int `json:"rootUID"` - RootGID int `json:"rootGID"` -} - -type process struct { - sync.WaitGroup - id string - bundle string - stdio *stdio - exec bool - containerPid int - checkpoint *checkpoint - checkpointPath string - shimIO *IO - stdinCloser io.Closer - console *os.File - consolePath string - state *processState - runtime string -} - -func newProcess(id, bundle, runtimeName string) (*process, error) { - p := &process{ - id: id, - bundle: bundle, - runtime: runtimeName, - } - s, err := loadProcess() - if err != nil { - return nil, fmt.Errorf("load process from json %s", err) - } - p.state = s - if s.CheckpointPath != "" { - cpt, err := loadCheckpoint(s.CheckpointPath) - if err != nil { - return nil, err - } - p.checkpoint = cpt - p.checkpointPath = s.CheckpointPath - } - if err := p.openIO(); err != nil { - return nil, fmt.Errorf("open IO for container %s", err) - } - return p, nil -} - -func loadProcess() (*processState, error) { - f, err := os.Open("process.json") - if err != nil { - return nil, err - } - defer f.Close() - var s processState - if err := json.NewDecoder(f).Decode(&s); err != nil { - return nil, err - } - return &s, nil -} - -func loadCheckpoint(checkpointPath string) (*checkpoint, error) { - f, err := os.Open(filepath.Join(checkpointPath, "config.json")) - if err != nil { - return nil, err - } - defer f.Close() - var cpt checkpoint - if err := json.NewDecoder(f).Decode(&cpt); err != nil { - return nil, err - } - return &cpt, nil -} - -func (p *process) create() error { - cwd, err := os.Getwd() - if err != nil { - return err - } - logPath := filepath.Join(cwd, "log.json") - args := append([]string{ - "--log", logPath, - "--log-format", "json", - }, p.state.RuntimeArgs...) - if p.state.Exec { - args = append(args, "exec", - "-d", - "--process", filepath.Join(cwd, "process.json"), - "--console", p.consolePath, - ) - } else if p.checkpoint != nil { - args = append(args, "restore", - "-d", - "--image-path", p.checkpointPath, - "--work-path", filepath.Join(p.checkpointPath, "criu.work", "restore-"+time.Now().Format(time.RFC3339)), - ) - add := func(flags ...string) { - args = append(args, flags...) - } - if p.checkpoint.Shell { - add("--shell-job") - } - if p.checkpoint.TCP { - add("--tcp-established") - } - if p.checkpoint.UnixSockets { - add("--ext-unix-sk") - } - if p.state.NoPivotRoot { - add("--no-pivot") - } - for _, ns := range p.checkpoint.EmptyNS { - add("--empty-ns", ns) - } - - } else { - args = append(args, "create", - "--bundle", p.bundle, - "--console", p.consolePath, - ) - if p.state.NoPivotRoot { - args = append(args, "--no-pivot") - } - } - args = append(args, - "--pid-file", filepath.Join(cwd, "pid"), - p.id, - ) - cmd := exec.Command(p.runtime, args...) - cmd.Dir = p.bundle - cmd.Stdin = p.stdio.stdin - cmd.Stdout = p.stdio.stdout - cmd.Stderr = p.stdio.stderr - // Call out to setPDeathSig to set SysProcAttr as elements are platform specific - cmd.SysProcAttr = setPDeathSig() - - if err := cmd.Start(); err != nil { - if exErr, ok := err.(*exec.Error); ok { - if exErr.Err == exec.ErrNotFound || exErr.Err == os.ErrNotExist { - return fmt.Errorf("%s not installed on system", p.runtime) - } - } - return err - } - p.stdio.stdout.Close() - p.stdio.stderr.Close() - if err := cmd.Wait(); err != nil { - if _, ok := err.(*exec.ExitError); ok { - return errRuntime - } - return err - } - data, err := ioutil.ReadFile("pid") - if err != nil { - return err - } - pid, err := strconv.Atoi(string(data)) - if err != nil { - return err - } - p.containerPid = pid - return nil -} - -func (p *process) pid() int { - return p.containerPid -} - -func (p *process) delete() error { - if !p.state.Exec { - cmd := exec.Command(p.runtime, append(p.state.RuntimeArgs, "delete", p.id)...) - cmd.SysProcAttr = setPDeathSig() - out, err := cmd.CombinedOutput() - if err != nil { - return fmt.Errorf("%s: %v", out, err) - } - } - return nil -} - -// openIO opens the pre-created fifo's for use with the container -// in RDWR so that they remain open if the other side stops listening -func (p *process) openIO() error { - p.stdio = &stdio{} - var ( - uid = p.state.RootUID - gid = p.state.RootGID - ) - go func() { - if stdinCloser, err := os.OpenFile(p.state.Stdin, syscall.O_WRONLY, 0); err == nil { - p.stdinCloser = stdinCloser - } - }() - - if p.state.Terminal { - master, console, err := newConsole(uid, gid) - if err != nil { - return err - } - p.console = master - p.consolePath = console - stdin, err := os.OpenFile(p.state.Stdin, syscall.O_RDONLY, 0) - if err != nil { - return err - } - go io.Copy(master, stdin) - stdout, err := os.OpenFile(p.state.Stdout, syscall.O_RDWR, 0) - if err != nil { - return err - } - p.Add(1) - go func() { - io.Copy(stdout, master) - master.Close() - p.Done() - }() - return nil - } - i, err := p.initializeIO(uid) - if err != nil { - return err - } - p.shimIO = i - // non-tty - for name, dest := range map[string]func(f *os.File){ - p.state.Stdout: func(f *os.File) { - p.Add(1) - go func() { - io.Copy(f, i.Stdout) - p.Done() - }() - }, - p.state.Stderr: func(f *os.File) { - p.Add(1) - go func() { - io.Copy(f, i.Stderr) - p.Done() - }() - }, - } { - f, err := os.OpenFile(name, syscall.O_RDWR, 0) - if err != nil { - return err - } - dest(f) - } - - f, err := os.OpenFile(p.state.Stdin, syscall.O_RDONLY, 0) - if err != nil { - return err - } - go func() { - io.Copy(i.Stdin, f) - i.Stdin.Close() - }() - - return nil -} - -// IO holds all 3 standard io Reader/Writer (stdin,stdout,stderr) -type IO struct { - Stdin io.WriteCloser - Stdout io.ReadCloser - Stderr io.ReadCloser -} - -func (p *process) initializeIO(rootuid int) (i *IO, err error) { - var fds []uintptr - i = &IO{} - // cleanup in case of an error - defer func() { - if err != nil { - for _, fd := range fds { - syscall.Close(int(fd)) - } - } - }() - // STDIN - r, w, err := os.Pipe() - if err != nil { - return nil, err - } - fds = append(fds, r.Fd(), w.Fd()) - p.stdio.stdin, i.Stdin = r, w - // STDOUT - if r, w, err = os.Pipe(); err != nil { - return nil, err - } - fds = append(fds, r.Fd(), w.Fd()) - p.stdio.stdout, i.Stdout = w, r - // STDERR - if r, w, err = os.Pipe(); err != nil { - return nil, err - } - fds = append(fds, r.Fd(), w.Fd()) - p.stdio.stderr, i.Stderr = w, r - // change ownership of the pipes in case we are in a user namespace - for _, fd := range fds { - if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil { - return nil, err - } - } - return i, nil -} -func (p *process) Close() error { - return p.stdio.Close() -} - -type stdio struct { - stdin *os.File - stdout *os.File - stderr *os.File -} - -func (s *stdio) Close() error { - err := s.stdin.Close() - if oerr := s.stdout.Close(); err == nil { - err = oerr - } - if oerr := s.stderr.Close(); err == nil { - err = oerr - } - return err -} diff --git a/execution/executors/shim/containerd-shim/process_pdeathsig.go b/execution/executors/shim/containerd-shim/process_pdeathsig.go deleted file mode 100644 index ef91429..0000000 --- a/execution/executors/shim/containerd-shim/process_pdeathsig.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build !solaris - -package main - -import ( - "syscall" -) - -// setPDeathSig sets the parent death signal to SIGKILL so that if the -// shim dies the container process also dies. -func setPDeathSig() *syscall.SysProcAttr { - return &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGKILL, - } -} diff --git a/execution/executors/shim/containerd-shim/process_pdeathsig_unsupported.go b/execution/executors/shim/containerd-shim/process_pdeathsig_unsupported.go deleted file mode 100644 index 08debc8..0000000 --- a/execution/executors/shim/containerd-shim/process_pdeathsig_unsupported.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build solaris - -package main - -import ( - "syscall" -) - -// setPDeathSig is a no-op on Solaris as Pdeathsig is not defined. -func setPDeathSig() *syscall.SysProcAttr { - return nil -} diff --git a/execution/executors/shim/process.go b/execution/executors/shim/process.go deleted file mode 100644 index a0066c7..0000000 --- a/execution/executors/shim/process.go +++ /dev/null @@ -1,543 +0,0 @@ -package shim - -import ( - "encoding/json" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - "sync" - "syscall" - "time" - - "github.com/Sirupsen/logrus" - "github.com/docker/containerd" - specs "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" -) - -var ( - ErrContainerStartTimeout = errors.New("shim: container did not start before the specified timeout") - ErrContainerNotStarted = errors.New("shim: container not started") - ErrProcessNotExited = errors.New("containerd: process has not exited") - ErrShimExited = errors.New("containerd: shim exited before container process was started") - errInvalidPidInt = errors.New("shim: process pid is invalid") -) - -const UnknownStatus = 255 - -type processOpts struct { - root string - noPivotRoot bool - checkpoint string - c *containerd.Container - cmd *exec.Cmd - exec bool - spec specs.Process - stdin io.Reader - stdout io.Writer - stderr io.Writer -} - -func newProcess(opts processOpts) (*process, error) { - var ( - spec = opts.c.Spec() - stdin, stdout, stderr string - ) - uid, gid, err := getRootIDs(spec) - if err != nil { - return nil, err - } - for _, t := range []struct { - path *string - v interface{} - }{ - { - path: &stdin, - v: opts.stdin, - }, - { - path: &stdout, - v: opts.stdout, - }, - { - path: &stderr, - v: opts.stderr, - }, - } { - p, err := getFifoPath(t.v) - if err != nil { - return nil, err - } - *t.path = p - } - p := &process{ - root: opts.root, - cmd: opts.cmd, - done: make(chan struct{}), - spec: opts.spec, - exec: opts.exec, - rootUid: uid, - rootGid: gid, - noPivotRoot: opts.noPivotRoot, - checkpoint: opts.checkpoint, - stdin: stdin, - stdout: stdout, - stderr: stderr, - } - f, err := os.Create(filepath.Join(opts.root, "process.json")) - if err != nil { - return nil, err - } - err = json.NewEncoder(f).Encode(p) - f.Close() - if err != nil { - return nil, err - } - exit, err := getExitPipe(filepath.Join(opts.root, "exit")) - if err != nil { - return nil, err - } - control, err := getControlPipe(filepath.Join(opts.root, "control")) - if err != nil { - return nil, err - } - p.exit, p.control = exit, control - return p, nil -} - -type process struct { - root string - cmd *exec.Cmd - done chan struct{} - success bool - startTime string - mu sync.Mutex - pid int - exit *os.File - control *os.File - - spec specs.Process - noPivotRoot bool - exec bool - rootUid int - rootGid int - checkpoint string - stdin string - stdout string - stderr string -} - -type processState struct { - specs.Process - Exec bool `json:"exec"` - RootUID int `json:"rootUID"` - RootGID int `json:"rootGID"` - Checkpoint string `json:"checkpoint"` - NoPivotRoot bool `json:"noPivotRoot"` - RuntimeArgs []string `json:"runtimeArgs"` - Root string `json:"root"` - StartTime string `json:"startTime"` - // Stdin fifo filepath - Stdin string `json:"stdin"` - // Stdout fifo filepath - Stdout string `json:"stdout"` - // Stderr fifo filepath - Stderr string `json:"stderr"` -} - -func (p *process) MarshalJSON() ([]byte, error) { - ps := processState{ - Process: p.spec, - NoPivotRoot: p.noPivotRoot, - Checkpoint: p.checkpoint, - RootUID: p.rootUid, - RootGID: p.rootGid, - Exec: p.exec, - Stdin: p.stdin, - Stdout: p.stdout, - Stderr: p.stderr, - Root: p.root, - StartTime: p.startTime, - } - return json.Marshal(ps) -} - -func (p *process) UnmarshalJSON(b []byte) error { - var ps processState - if err := json.Unmarshal(b, &ps); err != nil { - return err - } - p.spec = ps.Process - p.noPivotRoot = ps.NoPivotRoot - p.rootGid = ps.RootGID - p.rootUid = ps.RootUID - p.checkpoint = ps.Checkpoint - p.exec = ps.Exec - p.stdin = ps.Stdin - p.stdout = ps.Stdout - p.stderr = ps.Stderr - p.root = ps.Root - p.startTime = ps.StartTime - p.done = make(chan struct{}) - pid, err := readPid(filepath.Join(p.root, "pid")) - if err != nil { - return err - } - p.pid = pid - exit, err := getExitPipe(filepath.Join(p.root, "exit")) - if err != nil { - return err - } - control, err := getControlPipe(filepath.Join(p.root, "control")) - if err != nil { - return err - } - p.exit, p.control = exit, control - return nil -} - -func (p *process) Pid() int { - return p.pid -} - -func (p *process) FD() int { - return int(p.exit.Fd()) -} - -func (p *process) Close() error { - return p.exit.Close() -} - -func (p *process) Remove() bool { - return true -} - -func (p *process) Wait() (rst uint32, rerr error) { - <-p.done - data, err := ioutil.ReadFile(filepath.Join(p.root, "exitStatus")) - defer func() { - if rerr != nil { - rst, rerr = p.handleSigkilledShim(rst, rerr) - } - }() - if err != nil { - if os.IsNotExist(err) { - return UnknownStatus, ErrProcessNotExited - } - return UnknownStatus, err - } - if len(data) == 0 { - return UnknownStatus, ErrProcessNotExited - } - i, err := strconv.ParseUint(string(data), 10, 32) - return uint32(i), err -} - -func (p *process) Signal(s os.Signal) error { - _, err := fmt.Fprintf(p.control, "%d %d %d\n", 2, s, 0) - return err -} - -// same checks if the process is the same process originally launched -func (p *process) same() (bool, error) { - /// for backwards compat assume true if it is not set - p.mu.Lock() - defer p.mu.Unlock() - if p.startTime == "" { - return true, nil - } - pid, err := readPid(filepath.Join(p.root, "pid")) - if err != nil { - return false, nil - } - started, err := readProcessStartTime(pid) - if err != nil { - return false, err - } - return p.startTime == started, nil -} - -func (p *process) checkExited() { - err := p.cmd.Wait() - if err == nil { - p.mu.Lock() - if p.success { - p.mu.Unlock() - return - } - p.success = true - p.mu.Unlock() - } - if same, _ := p.same(); same && p.hasPid() { - // The process changed its PR_SET_PDEATHSIG, so force kill it - logrus.Infof("containerd: (pid %v) has become an orphan, killing it", p.pid) - if err := unix.Kill(p.pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { - logrus.Errorf("containerd: unable to SIGKILL (pid %v): %v", p.pid, err) - return - } - // wait for the container process to exit - for { - if err := unix.Kill(p.pid, 0); err != nil { - break - } - time.Sleep(5 * time.Millisecond) - } - } -} - -func (p *process) hasPid() bool { - p.mu.Lock() - r := p.pid > 0 - p.mu.Unlock() - return r -} - -type pidResponse struct { - pid int - err error -} - -func (p *process) waitForCreate(timeout time.Duration) error { - r := make(chan pidResponse, 1) - go p.readContainerPid(r) - - select { - case resp := <-r: - if resp.err != nil { - return resp.err - } - p.mu.Lock() - p.pid = resp.pid - started, err := readProcessStartTime(resp.pid) - if err != nil { - if os.IsNotExist(err) { - // process already exited - p.success = true - p.mu.Unlock() - return nil - } - logrus.Warnf("shim: unable to save starttime: %v", err) - } - p.startTime = started - f, err := os.Create(filepath.Join(p.root, "process.json")) - if err != nil { - logrus.Warnf("shim: unable to create process.json: %v", err) - p.mu.Unlock() - return nil - } - defer f.Close() - if err := json.NewEncoder(f).Encode(p); err != nil { - logrus.Warnf("shim: unable to encode process: %v", err) - } - p.mu.Unlock() - return nil - case <-time.After(timeout): - p.cmd.Process.Kill() - p.cmd.Wait() - return ErrContainerStartTimeout - } -} - -func (p *process) readContainerPid(r chan pidResponse) { - pidFile := filepath.Join(p.root, "pid") - for { - pid, err := readPid(pidFile) - if err != nil { - if os.IsNotExist(err) || err == errInvalidPidInt { - if serr := checkErrorLogs(p.cmd, - filepath.Join(p.root, "shim-log.json"), - filepath.Join(p.root, "log.json")); serr != nil && !os.IsNotExist(serr) { - r <- pidResponse{ - err: serr, - } - break - } - time.Sleep(15 * time.Millisecond) - continue - } - r <- pidResponse{ - err: err, - } - break - } - r <- pidResponse{ - pid: pid, - } - break - } -} - -func (p *process) handleSigkilledShim(rst uint32, rerr error) (uint32, error) { - if err := unix.Kill(p.pid, 0); err == syscall.ESRCH { - logrus.Warnf("containerd: (pid %d) does not exist", p.pid) - // The process died while containerd was down (probably of - // SIGKILL, but no way to be sure) - return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus) - } - - // If it's not the same process, just mark it stopped and set - // the status to the UnknownStatus value (i.e. 255) - if same, _ := p.same(); !same { - // Create the file so we get the exit event generated once monitor kicks in - // without having to go through all this process again - return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus) - } - ppid, err := readProcStatField(p.pid, 4) - if err != nil { - return rst, fmt.Errorf("could not check process ppid: %v (%v)", err, rerr) - } - if ppid == "1" { - if err := unix.Kill(p.pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { - return UnknownStatus, fmt.Errorf( - "containerd: unable to SIGKILL (pid %v): %v", p.pid, err) - } - // wait for the process to die - for { - if err := unix.Kill(p.pid, 0); err == syscall.ESRCH { - break - } - time.Sleep(5 * time.Millisecond) - } - // Create the file so we get the exit event generated once monitor kicks in - // without having to go through all this process again - status := 128 + uint32(syscall.SIGKILL) - return status, writeExitStatus(filepath.Join(p.root, "exitStatus"), status) - } - return rst, rerr -} - -func checkErrorLogs(cmd *exec.Cmd, shimLogPath, runtimeLogPath string) error { - alive, err := isAlive(cmd) - if err != nil { - return err - } - if !alive { - // runc could have failed to run the container so lets get the error - // out of the logs or the shim could have encountered an error - messages, err := readLogMessages(shimLogPath) - if err != nil { - return err - } - for _, m := range messages { - if m.Level == "error" { - return fmt.Errorf("shim error: %v", m.Msg) - } - } - // no errors reported back from shim, check for runc/runtime errors - messages, err = readLogMessages(runtimeLogPath) - if err != nil { - if os.IsNotExist(err) { - err = ErrContainerNotStarted - } - return err - } - for _, m := range messages { - if m.Level == "error" { - return fmt.Errorf("oci runtime error: %v", m.Msg) - } - } - return ErrContainerNotStarted - } - return nil -} - -func readProcessStartTime(pid int) (string, error) { - return readProcStatField(pid, 22) -} - -func readProcStatField(pid int, field int) (string, error) { - data, err := ioutil.ReadFile(filepath.Join(string(filepath.Separator), "proc", strconv.Itoa(pid), "stat")) - if err != nil { - return "", err - } - if field > 2 { - // First, split out the name since he could contains spaces. - parts := strings.Split(string(data), ") ") - // Now split out the rest, we end up with 2 fields less - parts = strings.Split(parts[1], " ") - return parts[field-2-1], nil // field count start at 1 in manual - } - parts := strings.Split(string(data), " (") - if field == 1 { - return parts[0], nil - } - return strings.Split(parts[1], ") ")[0], nil -} - -func readPid(pidFile string) (int, error) { - data, err := ioutil.ReadFile(pidFile) - if err != nil { - return -1, err - } - i, err := strconv.Atoi(string(data)) - if err != nil { - return -1, errInvalidPidInt - } - return i, nil -} - -// isAlive checks if the shim that launched the container is still alive -func isAlive(cmd *exec.Cmd) (bool, error) { - if _, err := syscall.Wait4(cmd.Process.Pid, nil, syscall.WNOHANG, nil); err == nil { - return true, nil - } - if err := syscall.Kill(cmd.Process.Pid, 0); err != nil { - if err == syscall.ESRCH { - return false, nil - } - return false, err - } - return true, nil -} - -type message struct { - Level string `json:"level"` - Msg string `json:"msg"` -} - -func readLogMessages(path string) ([]message, error) { - var out []message - f, err := os.Open(path) - if err != nil { - return nil, err - } - defer f.Close() - dec := json.NewDecoder(f) - for { - var m message - if err := dec.Decode(&m); err != nil { - if err == io.EOF { - break - } - return nil, err - } - out = append(out, m) - } - return out, nil -} - -func getExitPipe(path string) (*os.File, error) { - if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) { - return nil, err - } - // add NONBLOCK in case the other side has already closed or else - // this function would never return - return os.OpenFile(path, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) -} - -func getControlPipe(path string) (*os.File, error) { - if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) { - return nil, err - } - return os.OpenFile(path, syscall.O_RDWR|syscall.O_NONBLOCK, 0) -} - -func writeExitStatus(path string, status uint32) error { - return ioutil.WriteFile(path, []byte(fmt.Sprintf("%u", status)), 0644) -} diff --git a/execution/executors/shim/shim.go b/execution/executors/shim/shim.go deleted file mode 100644 index 5c7862a..0000000 --- a/execution/executors/shim/shim.go +++ /dev/null @@ -1,430 +0,0 @@ -package shim - -import ( - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "sync" - "syscall" - "time" - - "github.com/docker/containerd" - "github.com/docker/containerd/executors/oci" - "github.com/docker/containerd/monitor" - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -/* -├── libcontainerd -│   ├── containerd -│   │   └── ff2e86955c2be43f0e3c300fbd3786599301bd8efcaa5a386587f132e73af242 -│   │   ├── init -│   │   │   ├── control -│   │   │   ├── exit -│   │   │   ├── log.json -│   │   │   ├── pid -│   │   │   ├── process.json -│   │   │   ├── shim-log.json -│   │   │   └── starttime -│   │   └── state.json -*/ - -var ( - ErrNotFifo = errors.New("shim: IO is not a valid fifo on disk") - errInitProcessNotExist = errors.New("shim: init process does not exist") -) - -type Opts struct { - Name string - RuntimeName string - RuntimeArgs []string - RuntimeRoot string - NoPivotRoot bool - Root string - Timeout time.Duration -} - -func New(opts Opts) (*Shim, error) { - if err := os.MkdirAll(filepath.Dir(opts.Root), 0711); err != nil { - return nil, err - } - if err := os.Mkdir(opts.Root, 0711); err != nil { - return nil, err - } - r, err := oci.New(oci.Opts{ - Root: opts.RuntimeRoot, - Name: opts.RuntimeName, - Args: opts.RuntimeArgs, - }) - if err != nil { - return nil, err - } - m, err := monitor.New() - if err != nil { - return nil, err - } - s := &Shim{ - root: opts.Root, - name: opts.Name, - timeout: opts.Timeout, - runtime: r, - processes: make(map[string]*process), - m: m, - } - go s.startMonitor() - f, err := os.Create(filepath.Join(opts.Root, "state.json")) - if err != nil { - return nil, err - } - err = json.NewEncoder(f).Encode(s) - f.Close() - return s, err -} - -// Load will load an existing shim with all its information restored from the -// provided path -func Load(root string) (*Shim, error) { - f, err := os.Open(filepath.Join(root, "state.json")) - if err != nil { - return nil, err - } - var s Shim - err = json.NewDecoder(f).Decode(&s) - f.Close() - if err != nil { - return nil, err - } - m, err := monitor.New() - if err != nil { - return nil, err - } - s.m = m - go s.startMonitor() - dirs, err := ioutil.ReadDir(root) - if err != nil { - return nil, err - } - for _, d := range dirs { - if !d.IsDir() { - continue - } - name := d.Name() - if f, err = os.Open(filepath.Join(root, name, "process.json")); err != nil { - return nil, err - } - var p process - err = json.NewDecoder(f).Decode(&p) - f.Close() - if err != nil { - return nil, err - } - s.processes[name] = &p - if err := s.m.Add(&p); err != nil { - return nil, err - } - } - return &s, nil -} - -// Shim is a container runtime that adds a shim process as the container's parent -// to hold open stdio and other resources so that higher level daemons can exit and -// load running containers for handling upgrades and/or crashes -// -// The shim uses an OCI compliant runtime as its executor -type Shim struct { - // root holds runtime state information for the containers - // launched by the runtime - root string - name string - timeout time.Duration - noPivotRoot bool - runtime *oci.OCIRuntime - pmu sync.Mutex - processes map[string]*process - bundle string - checkpoint string - m *monitor.Monitor -} - -type state struct { - Root string `json:"root"` - // Bundle is the path to the container's bundle - Bundle string `json:"bundle"` - // OCI runtime binary name - Runtime string `json:"runtime"` - // OCI runtime args - RuntimeArgs []string `json:"runtimeArgs"` - RuntimeRoot string `json:"runtimeRoot"` - // Shim binary name - Name string `json:"shim"` - /// NoPivotRoot option - NoPivotRoot bool `json:"noPivotRoot"` - // Timeout for container start - Timeout time.Duration `json:"timeout"` -} - -func (s *Shim) MarshalJSON() ([]byte, error) { - st := state{ - Name: s.name, - Bundle: s.bundle, - Runtime: s.runtime.Name(), - RuntimeArgs: s.runtime.Args(), - RuntimeRoot: s.runtime.Root(), - NoPivotRoot: s.noPivotRoot, - Timeout: s.timeout, - Root: s.root, - } - return json.Marshal(st) -} - -func (s *Shim) UnmarshalJSON(b []byte) error { - var st state - if err := json.Unmarshal(b, &st); err != nil { - return err - } - s.root = st.Root - s.name = st.Name - s.bundle = st.Bundle - s.timeout = st.Timeout - s.noPivotRoot = st.NoPivotRoot - r, err := oci.New(oci.Opts{ - Name: st.Runtime, - Args: st.RuntimeArgs, - Root: st.RuntimeRoot, - }) - if err != nil { - return err - } - s.runtime = r - s.processes = make(map[string]*process) - return nil -} - -func (s *Shim) Create(c *containerd.Container) (containerd.ProcessDelegate, error) { - s.bundle = c.Path() - var ( - root = filepath.Join(s.root, "init") - cmd = s.command(c.ID(), c.Path(), s.runtime.Name()) - ) - if err := os.Mkdir(root, 0711); err != nil { - return nil, err - } - // exec the shim inside the state directory setup with the process - // information for what is being run - cmd.Dir = root - // make sure the shim is in a new process group - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - p, err := s.startCommand(processOpts{ - spec: c.Spec().Process, - root: root, - noPivotRoot: s.noPivotRoot, - checkpoint: s.checkpoint, - c: c, - cmd: cmd, - stdin: c.Stdin, - stdout: c.Stdout, - stderr: c.Stderr, - }) - if err != nil { - return nil, err - } - s.pmu.Lock() - s.processes["init"] = p - s.pmu.Unlock() - f, err := os.Create(filepath.Join(s.root, "state.json")) - if err != nil { - return nil, err - } - err = json.NewEncoder(f).Encode(s) - f.Close() - // ~TODO: oom and stats stuff here - return p, err -} - -func (s *Shim) Start(c *containerd.Container) error { - p, err := s.getContainerInit() - if err != nil { - return err - } - var ( - errC = make(chan error, 1) - cmd = s.runtime.Command("start", c.ID()) - ) - go func() { - out, err := cmd.CombinedOutput() - if err != nil { - errC <- fmt.Errorf("%s: %q", err, out) - } - errC <- nil - }() - select { - case err := <-errC: - if err != nil { - return err - } - case <-p.done: - if !p.success { - if cmd.Process != nil { - cmd.Process.Kill() - } - cmd.Wait() - return ErrShimExited - } - err := <-errC - if err != nil { - return err - } - } - return nil -} - -func (s *Shim) Delete(c *containerd.Container) error { - if err := s.runtime.Delete(c); err != nil { - return err - } - return os.RemoveAll(s.root) -} - -func (s *Shim) Exec(c *containerd.Container, p *containerd.Process) (containerd.ProcessDelegate, error) { - root, err := ioutil.TempDir(s.root, "") - if err != nil { - return nil, err - } - cmd := s.command(c.ID(), c.Path(), s.runtime.Name()) - // exec the shim inside the state directory setup with the process - // information for what is being run - cmd.Dir = root - // make sure the shim is in a new process group - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - sp, err := s.startCommand(processOpts{ - exec: true, - spec: *p.Spec(), - root: root, - noPivotRoot: s.noPivotRoot, - checkpoint: s.checkpoint, - c: c, - cmd: cmd, - stdin: p.Stdin, - stdout: p.Stdout, - stderr: p.Stderr, - }) - if err != nil { - return nil, err - } - s.pmu.Lock() - s.processes[filepath.Base(root)] = sp - s.pmu.Unlock() - return sp, nil -} - -func (s *Shim) Load(id string) (containerd.ProcessDelegate, error) { - return s.getContainerInit() -} - -func (s *Shim) getContainerInit() (*process, error) { - s.pmu.Lock() - p, ok := s.processes["init"] - s.pmu.Unlock() - if !ok { - return nil, errInitProcessNotExist - } - return p, nil -} - -func (s *Shim) startCommand(opts processOpts) (*process, error) { - p, err := newProcess(opts) - if err != nil { - return nil, err - } - if err := s.m.Add(p); err != nil { - return nil, err - } - if err := opts.cmd.Start(); err != nil { - close(p.done) - if checkShimNotFound(err) { - return nil, fmt.Errorf("%s not install on system", s.name) - } - return nil, err - } - // make sure it does not die before we get the container's pid - defer func() { - go p.checkExited() - }() - if err := p.waitForCreate(s.timeout); err != nil { - return nil, err - } - return p, nil -} - -func (s *Shim) command(args ...string) *exec.Cmd { - return exec.Command(s.name, args...) -} - -func (s *Shim) startMonitor() { - go s.m.Run() - defer s.m.Close() - for m := range s.m.Events() { - p := m.(*process) - close(p.done) - } -} - -// checkShimNotFound checks the error returned from a exec call to see if the binary -// that was called exists on the system and returns true if the shim binary does not exist -func checkShimNotFound(err error) bool { - if exitError, ok := err.(*exec.Error); ok { - e := exitError.Err - return e == exec.ErrNotFound || e == os.ErrNotExist - } - return false -} - -// getFifoPath returns the path to the fifo on disk as long as the provided -// interface is an *os.File and has a valid path on the Name() method call -func getFifoPath(v interface{}) (string, error) { - f, ok := v.(*os.File) - if !ok { - return "", ErrNotFifo - } - p := f.Name() - if p == "" { - return "", ErrNotFifo - } - return p, nil -} - -func getRootIDs(s *specs.Spec) (int, int, error) { - if s == nil { - return 0, 0, nil - } - var hasUserns bool - for _, ns := range s.Linux.Namespaces { - if ns.Type == specs.UserNamespace { - hasUserns = true - break - } - } - if !hasUserns { - return 0, 0, nil - } - uid := hostIDFromMap(0, s.Linux.UIDMappings) - gid := hostIDFromMap(0, s.Linux.GIDMappings) - return uid, gid, nil -} - -func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int { - for _, m := range mp { - if (id >= m.ContainerID) && (id <= (m.ContainerID + m.Size - 1)) { - return int(m.HostID + (id - m.ContainerID)) - } - } - return 0 -}