diff --git a/containerd-shim/main.go b/containerd-shim/main.go index fbfc686..85d5f1e 100644 --- a/containerd-shim/main.go +++ b/containerd-shim/main.go @@ -106,7 +106,7 @@ func start(log *os.File) error { case s := <-signals: switch s { case syscall.SIGCHLD: - exits, _ := osutils.Reap() + exits, _ := osutils.Reap(false) for _, e := range exits { // check to see if runtime is one of the processes that has exited if e.Pid == p.pid() { @@ -117,6 +117,9 @@ func start(log *os.File) error { } // runtime has exited so the shim can also exit if exitShim { + // Wait for all the childs this process may have created + // (only needed for exec, but it won't hurt when done on init) + osutils.Reap(true) // Let containerd take care of calling the runtime delete f.Close() p.Wait() diff --git a/containerd/main.go b/containerd/main.go index 9231e86..fec3666 100644 --- a/containerd/main.go +++ b/containerd/main.go @@ -23,7 +23,6 @@ import ( "github.com/docker/containerd/api/grpc/server" "github.com/docker/containerd/api/grpc/types" "github.com/docker/containerd/api/http/pprof" - "github.com/docker/containerd/osutils" "github.com/docker/containerd/supervisor" "github.com/docker/docker/pkg/listeners" "github.com/rcrowley/go-metrics" @@ -160,7 +159,6 @@ func main() { func daemon(context *cli.Context) error { s := make(chan os.Signal, 2048) signal.Notify(s, syscall.SIGTERM, syscall.SIGINT) - osutils.SetSubreaper(1) sv, err := supervisor.New( context.String("state-dir"), context.String("runtime"), diff --git a/osutils/reaper.go b/osutils/reaper.go index ab7d24d..6a80335 100644 --- a/osutils/reaper.go +++ b/osutils/reaper.go @@ -12,13 +12,17 @@ type Exit struct { // Reap reaps all child processes for the calling process and returns their // exit information -func Reap() (exits []Exit, err error) { +func Reap(wait bool) (exits []Exit, err error) { var ( ws syscall.WaitStatus rus syscall.Rusage ) + flag := syscall.WNOHANG + if wait { + flag = 0 + } for { - pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, &rus) + pid, err := syscall.Wait4(-1, &ws, flag, &rus) if err != nil { if err == syscall.ECHILD { return exits, nil diff --git a/runtime/container.go b/runtime/container.go index 9e82a0c..c4a2f7e 100644 --- a/runtime/container.go +++ b/runtime/container.go @@ -14,6 +14,7 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/containerd/specs" ocs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" ) // Container defines the operations allowed on a container @@ -480,12 +481,33 @@ func (c *container) createCmd(pid string, cmd *exec.Cmd, p *process) error { } return err } - go func() { - err := p.cmd.Wait() - if err == nil { - p.cmdSuccess = true - } - close(p.cmdDoneCh) + // We need the pid file to have been written to run + defer func() { + go func() { + err := p.cmd.Wait() + if err == nil { + p.cmdSuccess = true + } + + if same, err := p.isSameProcess(); same && p.pid > 0 { + // The process changed its PR_SET_PDEATHSIG, so force + // kill it + logrus.Infof("containerd: %s:%s (pid %v) has become an orphan, killing it", p.container.id, p.id, p.pid) + err = unix.Kill(p.pid, syscall.SIGKILL) + if err != nil && err != syscall.ESRCH { + logrus.Errorf("containerd: unable to SIGKILL %s:%s (pid %v): %v", p.container.id, p.id, p.pid, err) + } else { + for { + err = unix.Kill(p.pid, 0) + if err != nil { + break + } + time.Sleep(5 * time.Millisecond) + } + } + } + close(p.cmdDoneCh) + }() }() if err := c.waitForCreate(p, cmd); err != nil { return err diff --git a/runtime/process.go b/runtime/process.go index 56a5082..cba1eab 100644 --- a/runtime/process.go +++ b/runtime/process.go @@ -228,24 +228,31 @@ func (p *process) Resize(w, h int) error { return err } +func (p *process) updateExitStatusFile(status int) (int, error) { + p.stateLock.Lock() + p.state = Stopped + p.stateLock.Unlock() + err := ioutil.WriteFile(filepath.Join(p.root, ExitStatusFile), []byte(fmt.Sprintf("%d", status)), 0644) + return status, err +} + func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) { if p.cmd == nil || p.cmd.Process == nil { e := unix.Kill(p.pid, 0) if e == syscall.ESRCH { - return rst, rerr + logrus.Warnf("containerd: %s:%s (pid %d) does not exist", p.container.id, p.id, p.pid) + // The process died while containerd was down (probably of + // SIGKILL, but no way to be sure) + return p.updateExitStatusFile(255) } // If it's not the same process, just mark it stopped and set // the status to 255 if same, err := p.isSameProcess(); !same { logrus.Warnf("containerd: %s:%s (pid %d) is not the same process anymore (%v)", p.container.id, p.id, p.pid, err) - p.stateLock.Lock() - p.state = Stopped - p.stateLock.Unlock() // Create the file so we get the exit event generated once monitor kicks in - // without going to this all process again - rerr = ioutil.WriteFile(filepath.Join(p.root, ExitStatusFile), []byte("255"), 0644) - return 255, nil + // without having to go through all this process again + return p.updateExitStatusFile(255) } ppid, err := readProcStatField(p.pid, 4) @@ -255,19 +262,21 @@ func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) { if ppid == "1" { logrus.Warnf("containerd: %s:%s shim died, killing associated process", p.container.id, p.id) unix.Kill(p.pid, syscall.SIGKILL) + if err != nil && err != syscall.ESRCH { + return 255, fmt.Errorf("containerd: unable to SIGKILL %s:%s (pid %v): %v", p.container.id, p.id, p.pid, err) + } + // wait for the process to die for { e := unix.Kill(p.pid, 0) if e == syscall.ESRCH { break } - time.Sleep(10 * time.Millisecond) + time.Sleep(5 * time.Millisecond) } - - rst = 128 + int(syscall.SIGKILL) // Create the file so we get the exit event generated once monitor kicks in - // without going to this all process again - rerr = ioutil.WriteFile(filepath.Join(p.root, ExitStatusFile), []byte(fmt.Sprintf("%d", rst)), 0644) + // without having to go through all this process again + return p.updateExitStatusFile(128 + int(syscall.SIGKILL)) } return rst, rerr @@ -286,29 +295,8 @@ func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) { if shimStatus.Signaled() && shimStatus.Signal() == syscall.SIGKILL { logrus.Debugf("containerd: ExitStatus(container: %s, process: %s): shim was SIGKILL'ed reaping its child with pid %d", p.container.id, p.id, p.pid) - var ( - status unix.WaitStatus - rusage unix.Rusage - wpid int - ) - - // Some processes change their PR_SET_PDEATHSIG, so force kill them - unix.Kill(p.pid, syscall.SIGKILL) - - for wpid == 0 { - wpid, e = unix.Wait4(p.pid, &status, unix.WNOHANG, &rusage) - if e != nil { - logrus.Debugf("containerd: ExitStatus(container: %s, process: %s): Wait4(%d): %v", p.container.id, p.id, p.pid, rerr) - return rst, rerr - } - } - - if wpid == p.pid { - rerr = nil - rst = 128 + int(shimStatus.Signal()) - } else { - logrus.Errorf("containerd: ExitStatus(container: %s, process: %s): unexpected returned pid from wait4 %v (expected %v)", p.container.id, p.id, wpid, p.pid) - } + rerr = nil + rst = 128 + int(shimStatus.Signal()) p.stateLock.Lock() p.state = Stopped diff --git a/supervisor/exit.go b/supervisor/exit.go index 442327d..e20ee47 100644 --- a/supervisor/exit.go +++ b/supervisor/exit.go @@ -73,13 +73,19 @@ func (s *Supervisor) execExit(t *ExecExitTask) error { if err := container.RemoveProcess(t.PID); err != nil { logrus.WithField("error", err).Error("containerd: find container for pid") } - t.Process.Wait() - s.notifySubscribers(Event{ - Timestamp: time.Now(), - ID: t.ID, - Type: StateExit, - PID: t.PID, - Status: t.Status, - }) + // If the exec spawned children which are still using its IO + // waiting here will block until they die or close their IO + // descriptors. + // Hence, we use a go routine to avoid block all other operations + go func() { + t.Process.Wait() + s.notifySubscribers(Event{ + Timestamp: time.Now(), + ID: t.ID, + Type: StateExit, + PID: t.PID, + Status: t.Status, + }) + }() return nil }