Delete shim

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2016-12-05 15:38:51 -08:00
parent 21a53c1d70
commit e31a99c08a
23 changed files with 0 additions and 1608 deletions

View file

@ -1,56 +0,0 @@
// +build !solaris
package main
import (
"fmt"
"os"
"syscall"
"unsafe"
)
// NewConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (*os.File, string, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, "", err
}
console, err := ptsname(master)
if err != nil {
return nil, "", err
}
if err := unlockpt(master); err != nil {
return nil, "", err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, "", err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, "", err
}
return master, console, nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

View file

@ -1,14 +0,0 @@
// +build solaris
package main
import (
"errors"
"os"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (*os.File, string, error) {
return nil, "", errors.New("newConsole not implemented on Solaris")
}

View file

@ -1,169 +0,0 @@
package main
import (
"flag"
"fmt"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/docker/containerd/sys"
"github.com/docker/docker/pkg/term"
)
func writeMessage(f *os.File, level string, err error) {
fmt.Fprintf(f, `{"level": "%s","msg": "%s"}`, level, err)
}
type controlMessage struct {
Type int
Width int
Height int
}
// containerd-shim is a small shim that sits in front of a runtime implementation
// that allows it to be repartented to init and handle reattach from the caller.
//
// the cwd of the shim should be the path to the state directory where the shim
// can locate fifos and other information.
// Arg0: id of the container
// Arg1: bundle path
// Arg2: runtime binary
func main() {
flag.Parse()
cwd, err := os.Getwd()
if err != nil {
panic(err)
}
f, err := os.OpenFile(filepath.Join(cwd, "shim-log.json"), os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0666)
if err != nil {
panic(err)
}
if err := start(f); err != nil {
// this means that the runtime failed starting the container and will have the
// proper error messages in the runtime log so we should to treat this as a
// shim failure because the sim executed properly
if err == errRuntime {
f.Close()
return
}
// log the error instead of writing to stderr because the shim will have
// /dev/null as it's stdio because it is supposed to be reparented to system
// init and will not have anyone to read from it
writeMessage(f, "error", err)
f.Close()
os.Exit(1)
}
}
func start(log *os.File) error {
// start handling signals as soon as possible so that things are properly reaped
// or if runtime exits before we hit the handler
signals := make(chan os.Signal, 2048)
signal.Notify(signals)
// set the shim as the subreaper for all orphaned processes created by the container
if err := sys.SetSubreaper(1); err != nil {
return err
}
// open the exit pipe
f, err := os.OpenFile("exit", syscall.O_WRONLY, 0)
if err != nil {
return fmt.Errorf("open exit fifo %s", err)
}
defer f.Close()
control, err := os.OpenFile("control", syscall.O_RDWR, 0)
if err != nil {
return fmt.Errorf("open control fifo %s", err)
}
defer control.Close()
p, err := newProcess(flag.Arg(0), flag.Arg(1), flag.Arg(2))
if err != nil {
return err
}
defer func() {
if err := p.Close(); err != nil {
writeMessage(log, "warn", fmt.Errorf("close stdio %s", err))
}
}()
if err := p.create(); err != nil {
p.delete()
return err
}
msgC := make(chan controlMessage, 32)
go func() {
for {
var m controlMessage
if _, err := fmt.Fscanf(control, "%d %d %d\n", &m.Type, &m.Width, &m.Height); err != nil {
continue
}
msgC <- m
}
}()
var exitShim bool
for {
select {
case s := <-signals:
switch s {
case syscall.SIGCHLD:
exits, _ := sys.Reap(false)
for _, e := range exits {
// check to see if runtime is one of the processes that has exited
if e.Pid == p.pid() {
exitShim = true
writeInt("exitStatus", e.Status)
}
}
}
// runtime has exited so the shim can also exit
if exitShim {
// Let containerd take care of calling the runtime
// delete.
// This is needed to be done first in order to ensure
// that the call to Reap does not block until all
// children of the container have died if init was not
// started in its own PID namespace.
f.Close()
// Wait for all the childs this process may have
// created (needed for exec and init processes when
// they join another pid namespace)
p.Wait()
return nil
}
case msg := <-msgC:
switch msg.Type {
case 0:
// close stdin
if p.stdinCloser != nil {
p.stdinCloser.Close()
}
case 1:
// resize
if p.console == nil {
continue
}
ws := term.Winsize{
Width: uint16(msg.Width),
Height: uint16(msg.Height),
}
term.SetWinsize(p.console.Fd(), &ws)
case 2:
// signal
if err := syscall.Kill(p.pid(), syscall.Signal(msg.Width)); err != nil {
writeMessage(log, "warn", fmt.Errorf("signal pid %d: %s", msg.Width, err))
}
}
}
}
return nil
}
func writeInt(path string, i int) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
_, err = fmt.Fprintf(f, "%d", i)
return err
}

View file

@ -1,369 +0,0 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strconv"
"sync"
"syscall"
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
var errRuntime = errors.New("shim: runtime execution error")
type checkpoint struct {
// Timestamp is the time that checkpoint happened
Created time.Time `json:"created"`
// Name is the name of the checkpoint
Name string `json:"name"`
// TCP checkpoints open tcp connections
TCP bool `json:"tcp"`
// UnixSockets persists unix sockets in the checkpoint
UnixSockets bool `json:"unixSockets"`
// Shell persists tty sessions in the checkpoint
Shell bool `json:"shell"`
// Exit exits the container after the checkpoint is finished
Exit bool `json:"exit"`
// EmptyNS tells CRIU not to restore a particular namespace
EmptyNS []string `json:"emptyNS,omitempty"`
}
type processState struct {
specs.Process
Exec bool `json:"exec"`
Stdin string `json:"stdin"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
RuntimeArgs []string `json:"runtimeArgs"`
NoPivotRoot bool `json:"noPivotRoot"`
CheckpointPath string `json:"checkpoint"`
RootUID int `json:"rootUID"`
RootGID int `json:"rootGID"`
}
type process struct {
sync.WaitGroup
id string
bundle string
stdio *stdio
exec bool
containerPid int
checkpoint *checkpoint
checkpointPath string
shimIO *IO
stdinCloser io.Closer
console *os.File
consolePath string
state *processState
runtime string
}
func newProcess(id, bundle, runtimeName string) (*process, error) {
p := &process{
id: id,
bundle: bundle,
runtime: runtimeName,
}
s, err := loadProcess()
if err != nil {
return nil, fmt.Errorf("load process from json %s", err)
}
p.state = s
if s.CheckpointPath != "" {
cpt, err := loadCheckpoint(s.CheckpointPath)
if err != nil {
return nil, err
}
p.checkpoint = cpt
p.checkpointPath = s.CheckpointPath
}
if err := p.openIO(); err != nil {
return nil, fmt.Errorf("open IO for container %s", err)
}
return p, nil
}
func loadProcess() (*processState, error) {
f, err := os.Open("process.json")
if err != nil {
return nil, err
}
defer f.Close()
var s processState
if err := json.NewDecoder(f).Decode(&s); err != nil {
return nil, err
}
return &s, nil
}
func loadCheckpoint(checkpointPath string) (*checkpoint, error) {
f, err := os.Open(filepath.Join(checkpointPath, "config.json"))
if err != nil {
return nil, err
}
defer f.Close()
var cpt checkpoint
if err := json.NewDecoder(f).Decode(&cpt); err != nil {
return nil, err
}
return &cpt, nil
}
func (p *process) create() error {
cwd, err := os.Getwd()
if err != nil {
return err
}
logPath := filepath.Join(cwd, "log.json")
args := append([]string{
"--log", logPath,
"--log-format", "json",
}, p.state.RuntimeArgs...)
if p.state.Exec {
args = append(args, "exec",
"-d",
"--process", filepath.Join(cwd, "process.json"),
"--console", p.consolePath,
)
} else if p.checkpoint != nil {
args = append(args, "restore",
"-d",
"--image-path", p.checkpointPath,
"--work-path", filepath.Join(p.checkpointPath, "criu.work", "restore-"+time.Now().Format(time.RFC3339)),
)
add := func(flags ...string) {
args = append(args, flags...)
}
if p.checkpoint.Shell {
add("--shell-job")
}
if p.checkpoint.TCP {
add("--tcp-established")
}
if p.checkpoint.UnixSockets {
add("--ext-unix-sk")
}
if p.state.NoPivotRoot {
add("--no-pivot")
}
for _, ns := range p.checkpoint.EmptyNS {
add("--empty-ns", ns)
}
} else {
args = append(args, "create",
"--bundle", p.bundle,
"--console", p.consolePath,
)
if p.state.NoPivotRoot {
args = append(args, "--no-pivot")
}
}
args = append(args,
"--pid-file", filepath.Join(cwd, "pid"),
p.id,
)
cmd := exec.Command(p.runtime, args...)
cmd.Dir = p.bundle
cmd.Stdin = p.stdio.stdin
cmd.Stdout = p.stdio.stdout
cmd.Stderr = p.stdio.stderr
// Call out to setPDeathSig to set SysProcAttr as elements are platform specific
cmd.SysProcAttr = setPDeathSig()
if err := cmd.Start(); err != nil {
if exErr, ok := err.(*exec.Error); ok {
if exErr.Err == exec.ErrNotFound || exErr.Err == os.ErrNotExist {
return fmt.Errorf("%s not installed on system", p.runtime)
}
}
return err
}
p.stdio.stdout.Close()
p.stdio.stderr.Close()
if err := cmd.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); ok {
return errRuntime
}
return err
}
data, err := ioutil.ReadFile("pid")
if err != nil {
return err
}
pid, err := strconv.Atoi(string(data))
if err != nil {
return err
}
p.containerPid = pid
return nil
}
func (p *process) pid() int {
return p.containerPid
}
func (p *process) delete() error {
if !p.state.Exec {
cmd := exec.Command(p.runtime, append(p.state.RuntimeArgs, "delete", p.id)...)
cmd.SysProcAttr = setPDeathSig()
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("%s: %v", out, err)
}
}
return nil
}
// openIO opens the pre-created fifo's for use with the container
// in RDWR so that they remain open if the other side stops listening
func (p *process) openIO() error {
p.stdio = &stdio{}
var (
uid = p.state.RootUID
gid = p.state.RootGID
)
go func() {
if stdinCloser, err := os.OpenFile(p.state.Stdin, syscall.O_WRONLY, 0); err == nil {
p.stdinCloser = stdinCloser
}
}()
if p.state.Terminal {
master, console, err := newConsole(uid, gid)
if err != nil {
return err
}
p.console = master
p.consolePath = console
stdin, err := os.OpenFile(p.state.Stdin, syscall.O_RDONLY, 0)
if err != nil {
return err
}
go io.Copy(master, stdin)
stdout, err := os.OpenFile(p.state.Stdout, syscall.O_RDWR, 0)
if err != nil {
return err
}
p.Add(1)
go func() {
io.Copy(stdout, master)
master.Close()
p.Done()
}()
return nil
}
i, err := p.initializeIO(uid)
if err != nil {
return err
}
p.shimIO = i
// non-tty
for name, dest := range map[string]func(f *os.File){
p.state.Stdout: func(f *os.File) {
p.Add(1)
go func() {
io.Copy(f, i.Stdout)
p.Done()
}()
},
p.state.Stderr: func(f *os.File) {
p.Add(1)
go func() {
io.Copy(f, i.Stderr)
p.Done()
}()
},
} {
f, err := os.OpenFile(name, syscall.O_RDWR, 0)
if err != nil {
return err
}
dest(f)
}
f, err := os.OpenFile(p.state.Stdin, syscall.O_RDONLY, 0)
if err != nil {
return err
}
go func() {
io.Copy(i.Stdin, f)
i.Stdin.Close()
}()
return nil
}
// IO holds all 3 standard io Reader/Writer (stdin,stdout,stderr)
type IO struct {
Stdin io.WriteCloser
Stdout io.ReadCloser
Stderr io.ReadCloser
}
func (p *process) initializeIO(rootuid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}
// cleanup in case of an error
defer func() {
if err != nil {
for _, fd := range fds {
syscall.Close(int(fd))
}
}
}()
// STDIN
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.stdio.stdin, i.Stdin = r, w
// STDOUT
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.stdio.stdout, i.Stdout = w, r
// STDERR
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.stdio.stderr, i.Stderr = w, r
// change ownership of the pipes in case we are in a user namespace
for _, fd := range fds {
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
return nil, err
}
}
return i, nil
}
func (p *process) Close() error {
return p.stdio.Close()
}
type stdio struct {
stdin *os.File
stdout *os.File
stderr *os.File
}
func (s *stdio) Close() error {
err := s.stdin.Close()
if oerr := s.stdout.Close(); err == nil {
err = oerr
}
if oerr := s.stderr.Close(); err == nil {
err = oerr
}
return err
}

View file

@ -1,15 +0,0 @@
// +build !solaris
package main
import (
"syscall"
)
// setPDeathSig sets the parent death signal to SIGKILL so that if the
// shim dies the container process also dies.
func setPDeathSig() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
Pdeathsig: syscall.SIGKILL,
}
}

View file

@ -1,12 +0,0 @@
// +build solaris
package main
import (
"syscall"
)
// setPDeathSig is a no-op on Solaris as Pdeathsig is not defined.
func setPDeathSig() *syscall.SysProcAttr {
return nil
}

View file

@ -1,543 +0,0 @@
package shim
import (
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/containerd"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)
var (
ErrContainerStartTimeout = errors.New("shim: container did not start before the specified timeout")
ErrContainerNotStarted = errors.New("shim: container not started")
ErrProcessNotExited = errors.New("containerd: process has not exited")
ErrShimExited = errors.New("containerd: shim exited before container process was started")
errInvalidPidInt = errors.New("shim: process pid is invalid")
)
const UnknownStatus = 255
type processOpts struct {
root string
noPivotRoot bool
checkpoint string
c *containerd.Container
cmd *exec.Cmd
exec bool
spec specs.Process
stdin io.Reader
stdout io.Writer
stderr io.Writer
}
func newProcess(opts processOpts) (*process, error) {
var (
spec = opts.c.Spec()
stdin, stdout, stderr string
)
uid, gid, err := getRootIDs(spec)
if err != nil {
return nil, err
}
for _, t := range []struct {
path *string
v interface{}
}{
{
path: &stdin,
v: opts.stdin,
},
{
path: &stdout,
v: opts.stdout,
},
{
path: &stderr,
v: opts.stderr,
},
} {
p, err := getFifoPath(t.v)
if err != nil {
return nil, err
}
*t.path = p
}
p := &process{
root: opts.root,
cmd: opts.cmd,
done: make(chan struct{}),
spec: opts.spec,
exec: opts.exec,
rootUid: uid,
rootGid: gid,
noPivotRoot: opts.noPivotRoot,
checkpoint: opts.checkpoint,
stdin: stdin,
stdout: stdout,
stderr: stderr,
}
f, err := os.Create(filepath.Join(opts.root, "process.json"))
if err != nil {
return nil, err
}
err = json.NewEncoder(f).Encode(p)
f.Close()
if err != nil {
return nil, err
}
exit, err := getExitPipe(filepath.Join(opts.root, "exit"))
if err != nil {
return nil, err
}
control, err := getControlPipe(filepath.Join(opts.root, "control"))
if err != nil {
return nil, err
}
p.exit, p.control = exit, control
return p, nil
}
type process struct {
root string
cmd *exec.Cmd
done chan struct{}
success bool
startTime string
mu sync.Mutex
pid int
exit *os.File
control *os.File
spec specs.Process
noPivotRoot bool
exec bool
rootUid int
rootGid int
checkpoint string
stdin string
stdout string
stderr string
}
type processState struct {
specs.Process
Exec bool `json:"exec"`
RootUID int `json:"rootUID"`
RootGID int `json:"rootGID"`
Checkpoint string `json:"checkpoint"`
NoPivotRoot bool `json:"noPivotRoot"`
RuntimeArgs []string `json:"runtimeArgs"`
Root string `json:"root"`
StartTime string `json:"startTime"`
// Stdin fifo filepath
Stdin string `json:"stdin"`
// Stdout fifo filepath
Stdout string `json:"stdout"`
// Stderr fifo filepath
Stderr string `json:"stderr"`
}
func (p *process) MarshalJSON() ([]byte, error) {
ps := processState{
Process: p.spec,
NoPivotRoot: p.noPivotRoot,
Checkpoint: p.checkpoint,
RootUID: p.rootUid,
RootGID: p.rootGid,
Exec: p.exec,
Stdin: p.stdin,
Stdout: p.stdout,
Stderr: p.stderr,
Root: p.root,
StartTime: p.startTime,
}
return json.Marshal(ps)
}
func (p *process) UnmarshalJSON(b []byte) error {
var ps processState
if err := json.Unmarshal(b, &ps); err != nil {
return err
}
p.spec = ps.Process
p.noPivotRoot = ps.NoPivotRoot
p.rootGid = ps.RootGID
p.rootUid = ps.RootUID
p.checkpoint = ps.Checkpoint
p.exec = ps.Exec
p.stdin = ps.Stdin
p.stdout = ps.Stdout
p.stderr = ps.Stderr
p.root = ps.Root
p.startTime = ps.StartTime
p.done = make(chan struct{})
pid, err := readPid(filepath.Join(p.root, "pid"))
if err != nil {
return err
}
p.pid = pid
exit, err := getExitPipe(filepath.Join(p.root, "exit"))
if err != nil {
return err
}
control, err := getControlPipe(filepath.Join(p.root, "control"))
if err != nil {
return err
}
p.exit, p.control = exit, control
return nil
}
func (p *process) Pid() int {
return p.pid
}
func (p *process) FD() int {
return int(p.exit.Fd())
}
func (p *process) Close() error {
return p.exit.Close()
}
func (p *process) Remove() bool {
return true
}
func (p *process) Wait() (rst uint32, rerr error) {
<-p.done
data, err := ioutil.ReadFile(filepath.Join(p.root, "exitStatus"))
defer func() {
if rerr != nil {
rst, rerr = p.handleSigkilledShim(rst, rerr)
}
}()
if err != nil {
if os.IsNotExist(err) {
return UnknownStatus, ErrProcessNotExited
}
return UnknownStatus, err
}
if len(data) == 0 {
return UnknownStatus, ErrProcessNotExited
}
i, err := strconv.ParseUint(string(data), 10, 32)
return uint32(i), err
}
func (p *process) Signal(s os.Signal) error {
_, err := fmt.Fprintf(p.control, "%d %d %d\n", 2, s, 0)
return err
}
// same checks if the process is the same process originally launched
func (p *process) same() (bool, error) {
/// for backwards compat assume true if it is not set
p.mu.Lock()
defer p.mu.Unlock()
if p.startTime == "" {
return true, nil
}
pid, err := readPid(filepath.Join(p.root, "pid"))
if err != nil {
return false, nil
}
started, err := readProcessStartTime(pid)
if err != nil {
return false, err
}
return p.startTime == started, nil
}
func (p *process) checkExited() {
err := p.cmd.Wait()
if err == nil {
p.mu.Lock()
if p.success {
p.mu.Unlock()
return
}
p.success = true
p.mu.Unlock()
}
if same, _ := p.same(); same && p.hasPid() {
// The process changed its PR_SET_PDEATHSIG, so force kill it
logrus.Infof("containerd: (pid %v) has become an orphan, killing it", p.pid)
if err := unix.Kill(p.pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
logrus.Errorf("containerd: unable to SIGKILL (pid %v): %v", p.pid, err)
return
}
// wait for the container process to exit
for {
if err := unix.Kill(p.pid, 0); err != nil {
break
}
time.Sleep(5 * time.Millisecond)
}
}
}
func (p *process) hasPid() bool {
p.mu.Lock()
r := p.pid > 0
p.mu.Unlock()
return r
}
type pidResponse struct {
pid int
err error
}
func (p *process) waitForCreate(timeout time.Duration) error {
r := make(chan pidResponse, 1)
go p.readContainerPid(r)
select {
case resp := <-r:
if resp.err != nil {
return resp.err
}
p.mu.Lock()
p.pid = resp.pid
started, err := readProcessStartTime(resp.pid)
if err != nil {
if os.IsNotExist(err) {
// process already exited
p.success = true
p.mu.Unlock()
return nil
}
logrus.Warnf("shim: unable to save starttime: %v", err)
}
p.startTime = started
f, err := os.Create(filepath.Join(p.root, "process.json"))
if err != nil {
logrus.Warnf("shim: unable to create process.json: %v", err)
p.mu.Unlock()
return nil
}
defer f.Close()
if err := json.NewEncoder(f).Encode(p); err != nil {
logrus.Warnf("shim: unable to encode process: %v", err)
}
p.mu.Unlock()
return nil
case <-time.After(timeout):
p.cmd.Process.Kill()
p.cmd.Wait()
return ErrContainerStartTimeout
}
}
func (p *process) readContainerPid(r chan pidResponse) {
pidFile := filepath.Join(p.root, "pid")
for {
pid, err := readPid(pidFile)
if err != nil {
if os.IsNotExist(err) || err == errInvalidPidInt {
if serr := checkErrorLogs(p.cmd,
filepath.Join(p.root, "shim-log.json"),
filepath.Join(p.root, "log.json")); serr != nil && !os.IsNotExist(serr) {
r <- pidResponse{
err: serr,
}
break
}
time.Sleep(15 * time.Millisecond)
continue
}
r <- pidResponse{
err: err,
}
break
}
r <- pidResponse{
pid: pid,
}
break
}
}
func (p *process) handleSigkilledShim(rst uint32, rerr error) (uint32, error) {
if err := unix.Kill(p.pid, 0); err == syscall.ESRCH {
logrus.Warnf("containerd: (pid %d) does not exist", p.pid)
// The process died while containerd was down (probably of
// SIGKILL, but no way to be sure)
return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus)
}
// If it's not the same process, just mark it stopped and set
// the status to the UnknownStatus value (i.e. 255)
if same, _ := p.same(); !same {
// Create the file so we get the exit event generated once monitor kicks in
// without having to go through all this process again
return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus)
}
ppid, err := readProcStatField(p.pid, 4)
if err != nil {
return rst, fmt.Errorf("could not check process ppid: %v (%v)", err, rerr)
}
if ppid == "1" {
if err := unix.Kill(p.pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
return UnknownStatus, fmt.Errorf(
"containerd: unable to SIGKILL (pid %v): %v", p.pid, err)
}
// wait for the process to die
for {
if err := unix.Kill(p.pid, 0); err == syscall.ESRCH {
break
}
time.Sleep(5 * time.Millisecond)
}
// Create the file so we get the exit event generated once monitor kicks in
// without having to go through all this process again
status := 128 + uint32(syscall.SIGKILL)
return status, writeExitStatus(filepath.Join(p.root, "exitStatus"), status)
}
return rst, rerr
}
func checkErrorLogs(cmd *exec.Cmd, shimLogPath, runtimeLogPath string) error {
alive, err := isAlive(cmd)
if err != nil {
return err
}
if !alive {
// runc could have failed to run the container so lets get the error
// out of the logs or the shim could have encountered an error
messages, err := readLogMessages(shimLogPath)
if err != nil {
return err
}
for _, m := range messages {
if m.Level == "error" {
return fmt.Errorf("shim error: %v", m.Msg)
}
}
// no errors reported back from shim, check for runc/runtime errors
messages, err = readLogMessages(runtimeLogPath)
if err != nil {
if os.IsNotExist(err) {
err = ErrContainerNotStarted
}
return err
}
for _, m := range messages {
if m.Level == "error" {
return fmt.Errorf("oci runtime error: %v", m.Msg)
}
}
return ErrContainerNotStarted
}
return nil
}
func readProcessStartTime(pid int) (string, error) {
return readProcStatField(pid, 22)
}
func readProcStatField(pid int, field int) (string, error) {
data, err := ioutil.ReadFile(filepath.Join(string(filepath.Separator), "proc", strconv.Itoa(pid), "stat"))
if err != nil {
return "", err
}
if field > 2 {
// First, split out the name since he could contains spaces.
parts := strings.Split(string(data), ") ")
// Now split out the rest, we end up with 2 fields less
parts = strings.Split(parts[1], " ")
return parts[field-2-1], nil // field count start at 1 in manual
}
parts := strings.Split(string(data), " (")
if field == 1 {
return parts[0], nil
}
return strings.Split(parts[1], ") ")[0], nil
}
func readPid(pidFile string) (int, error) {
data, err := ioutil.ReadFile(pidFile)
if err != nil {
return -1, err
}
i, err := strconv.Atoi(string(data))
if err != nil {
return -1, errInvalidPidInt
}
return i, nil
}
// isAlive checks if the shim that launched the container is still alive
func isAlive(cmd *exec.Cmd) (bool, error) {
if _, err := syscall.Wait4(cmd.Process.Pid, nil, syscall.WNOHANG, nil); err == nil {
return true, nil
}
if err := syscall.Kill(cmd.Process.Pid, 0); err != nil {
if err == syscall.ESRCH {
return false, nil
}
return false, err
}
return true, nil
}
type message struct {
Level string `json:"level"`
Msg string `json:"msg"`
}
func readLogMessages(path string) ([]message, error) {
var out []message
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
dec := json.NewDecoder(f)
for {
var m message
if err := dec.Decode(&m); err != nil {
if err == io.EOF {
break
}
return nil, err
}
out = append(out, m)
}
return out, nil
}
func getExitPipe(path string) (*os.File, error) {
if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) {
return nil, err
}
// add NONBLOCK in case the other side has already closed or else
// this function would never return
return os.OpenFile(path, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
}
func getControlPipe(path string) (*os.File, error) {
if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) {
return nil, err
}
return os.OpenFile(path, syscall.O_RDWR|syscall.O_NONBLOCK, 0)
}
func writeExitStatus(path string, status uint32) error {
return ioutil.WriteFile(path, []byte(fmt.Sprintf("%u", status)), 0644)
}

View file

@ -1,430 +0,0 @@
package shim
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"sync"
"syscall"
"time"
"github.com/docker/containerd"
"github.com/docker/containerd/executors/oci"
"github.com/docker/containerd/monitor"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
/*
libcontainerd
   containerd
      ff2e86955c2be43f0e3c300fbd3786599301bd8efcaa5a386587f132e73af242
      init
         control
         exit
         log.json
         pid
         process.json
         shim-log.json
         starttime
      state.json
*/
var (
ErrNotFifo = errors.New("shim: IO is not a valid fifo on disk")
errInitProcessNotExist = errors.New("shim: init process does not exist")
)
type Opts struct {
Name string
RuntimeName string
RuntimeArgs []string
RuntimeRoot string
NoPivotRoot bool
Root string
Timeout time.Duration
}
func New(opts Opts) (*Shim, error) {
if err := os.MkdirAll(filepath.Dir(opts.Root), 0711); err != nil {
return nil, err
}
if err := os.Mkdir(opts.Root, 0711); err != nil {
return nil, err
}
r, err := oci.New(oci.Opts{
Root: opts.RuntimeRoot,
Name: opts.RuntimeName,
Args: opts.RuntimeArgs,
})
if err != nil {
return nil, err
}
m, err := monitor.New()
if err != nil {
return nil, err
}
s := &Shim{
root: opts.Root,
name: opts.Name,
timeout: opts.Timeout,
runtime: r,
processes: make(map[string]*process),
m: m,
}
go s.startMonitor()
f, err := os.Create(filepath.Join(opts.Root, "state.json"))
if err != nil {
return nil, err
}
err = json.NewEncoder(f).Encode(s)
f.Close()
return s, err
}
// Load will load an existing shim with all its information restored from the
// provided path
func Load(root string) (*Shim, error) {
f, err := os.Open(filepath.Join(root, "state.json"))
if err != nil {
return nil, err
}
var s Shim
err = json.NewDecoder(f).Decode(&s)
f.Close()
if err != nil {
return nil, err
}
m, err := monitor.New()
if err != nil {
return nil, err
}
s.m = m
go s.startMonitor()
dirs, err := ioutil.ReadDir(root)
if err != nil {
return nil, err
}
for _, d := range dirs {
if !d.IsDir() {
continue
}
name := d.Name()
if f, err = os.Open(filepath.Join(root, name, "process.json")); err != nil {
return nil, err
}
var p process
err = json.NewDecoder(f).Decode(&p)
f.Close()
if err != nil {
return nil, err
}
s.processes[name] = &p
if err := s.m.Add(&p); err != nil {
return nil, err
}
}
return &s, nil
}
// Shim is a container runtime that adds a shim process as the container's parent
// to hold open stdio and other resources so that higher level daemons can exit and
// load running containers for handling upgrades and/or crashes
//
// The shim uses an OCI compliant runtime as its executor
type Shim struct {
// root holds runtime state information for the containers
// launched by the runtime
root string
name string
timeout time.Duration
noPivotRoot bool
runtime *oci.OCIRuntime
pmu sync.Mutex
processes map[string]*process
bundle string
checkpoint string
m *monitor.Monitor
}
type state struct {
Root string `json:"root"`
// Bundle is the path to the container's bundle
Bundle string `json:"bundle"`
// OCI runtime binary name
Runtime string `json:"runtime"`
// OCI runtime args
RuntimeArgs []string `json:"runtimeArgs"`
RuntimeRoot string `json:"runtimeRoot"`
// Shim binary name
Name string `json:"shim"`
/// NoPivotRoot option
NoPivotRoot bool `json:"noPivotRoot"`
// Timeout for container start
Timeout time.Duration `json:"timeout"`
}
func (s *Shim) MarshalJSON() ([]byte, error) {
st := state{
Name: s.name,
Bundle: s.bundle,
Runtime: s.runtime.Name(),
RuntimeArgs: s.runtime.Args(),
RuntimeRoot: s.runtime.Root(),
NoPivotRoot: s.noPivotRoot,
Timeout: s.timeout,
Root: s.root,
}
return json.Marshal(st)
}
func (s *Shim) UnmarshalJSON(b []byte) error {
var st state
if err := json.Unmarshal(b, &st); err != nil {
return err
}
s.root = st.Root
s.name = st.Name
s.bundle = st.Bundle
s.timeout = st.Timeout
s.noPivotRoot = st.NoPivotRoot
r, err := oci.New(oci.Opts{
Name: st.Runtime,
Args: st.RuntimeArgs,
Root: st.RuntimeRoot,
})
if err != nil {
return err
}
s.runtime = r
s.processes = make(map[string]*process)
return nil
}
func (s *Shim) Create(c *containerd.Container) (containerd.ProcessDelegate, error) {
s.bundle = c.Path()
var (
root = filepath.Join(s.root, "init")
cmd = s.command(c.ID(), c.Path(), s.runtime.Name())
)
if err := os.Mkdir(root, 0711); err != nil {
return nil, err
}
// exec the shim inside the state directory setup with the process
// information for what is being run
cmd.Dir = root
// make sure the shim is in a new process group
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
p, err := s.startCommand(processOpts{
spec: c.Spec().Process,
root: root,
noPivotRoot: s.noPivotRoot,
checkpoint: s.checkpoint,
c: c,
cmd: cmd,
stdin: c.Stdin,
stdout: c.Stdout,
stderr: c.Stderr,
})
if err != nil {
return nil, err
}
s.pmu.Lock()
s.processes["init"] = p
s.pmu.Unlock()
f, err := os.Create(filepath.Join(s.root, "state.json"))
if err != nil {
return nil, err
}
err = json.NewEncoder(f).Encode(s)
f.Close()
// ~TODO: oom and stats stuff here
return p, err
}
func (s *Shim) Start(c *containerd.Container) error {
p, err := s.getContainerInit()
if err != nil {
return err
}
var (
errC = make(chan error, 1)
cmd = s.runtime.Command("start", c.ID())
)
go func() {
out, err := cmd.CombinedOutput()
if err != nil {
errC <- fmt.Errorf("%s: %q", err, out)
}
errC <- nil
}()
select {
case err := <-errC:
if err != nil {
return err
}
case <-p.done:
if !p.success {
if cmd.Process != nil {
cmd.Process.Kill()
}
cmd.Wait()
return ErrShimExited
}
err := <-errC
if err != nil {
return err
}
}
return nil
}
func (s *Shim) Delete(c *containerd.Container) error {
if err := s.runtime.Delete(c); err != nil {
return err
}
return os.RemoveAll(s.root)
}
func (s *Shim) Exec(c *containerd.Container, p *containerd.Process) (containerd.ProcessDelegate, error) {
root, err := ioutil.TempDir(s.root, "")
if err != nil {
return nil, err
}
cmd := s.command(c.ID(), c.Path(), s.runtime.Name())
// exec the shim inside the state directory setup with the process
// information for what is being run
cmd.Dir = root
// make sure the shim is in a new process group
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
sp, err := s.startCommand(processOpts{
exec: true,
spec: *p.Spec(),
root: root,
noPivotRoot: s.noPivotRoot,
checkpoint: s.checkpoint,
c: c,
cmd: cmd,
stdin: p.Stdin,
stdout: p.Stdout,
stderr: p.Stderr,
})
if err != nil {
return nil, err
}
s.pmu.Lock()
s.processes[filepath.Base(root)] = sp
s.pmu.Unlock()
return sp, nil
}
func (s *Shim) Load(id string) (containerd.ProcessDelegate, error) {
return s.getContainerInit()
}
func (s *Shim) getContainerInit() (*process, error) {
s.pmu.Lock()
p, ok := s.processes["init"]
s.pmu.Unlock()
if !ok {
return nil, errInitProcessNotExist
}
return p, nil
}
func (s *Shim) startCommand(opts processOpts) (*process, error) {
p, err := newProcess(opts)
if err != nil {
return nil, err
}
if err := s.m.Add(p); err != nil {
return nil, err
}
if err := opts.cmd.Start(); err != nil {
close(p.done)
if checkShimNotFound(err) {
return nil, fmt.Errorf("%s not install on system", s.name)
}
return nil, err
}
// make sure it does not die before we get the container's pid
defer func() {
go p.checkExited()
}()
if err := p.waitForCreate(s.timeout); err != nil {
return nil, err
}
return p, nil
}
func (s *Shim) command(args ...string) *exec.Cmd {
return exec.Command(s.name, args...)
}
func (s *Shim) startMonitor() {
go s.m.Run()
defer s.m.Close()
for m := range s.m.Events() {
p := m.(*process)
close(p.done)
}
}
// checkShimNotFound checks the error returned from a exec call to see if the binary
// that was called exists on the system and returns true if the shim binary does not exist
func checkShimNotFound(err error) bool {
if exitError, ok := err.(*exec.Error); ok {
e := exitError.Err
return e == exec.ErrNotFound || e == os.ErrNotExist
}
return false
}
// getFifoPath returns the path to the fifo on disk as long as the provided
// interface is an *os.File and has a valid path on the Name() method call
func getFifoPath(v interface{}) (string, error) {
f, ok := v.(*os.File)
if !ok {
return "", ErrNotFifo
}
p := f.Name()
if p == "" {
return "", ErrNotFifo
}
return p, nil
}
func getRootIDs(s *specs.Spec) (int, int, error) {
if s == nil {
return 0, 0, nil
}
var hasUserns bool
for _, ns := range s.Linux.Namespaces {
if ns.Type == specs.UserNamespace {
hasUserns = true
break
}
}
if !hasUserns {
return 0, 0, nil
}
uid := hostIDFromMap(0, s.Linux.UIDMappings)
gid := hostIDFromMap(0, s.Linux.GIDMappings)
return uid, gid, nil
}
func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
for _, m := range mp {
if (id >= m.ContainerID) && (id <= (m.ContainerID + m.Size - 1)) {
return int(m.HostID + (id - m.ContainerID))
}
}
return 0
}