From 654c537d38a05ff7d5dea567d2747a1bfea1cb5a Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 24 Jun 2016 12:02:53 -0700 Subject: [PATCH] Call start in containerd This fixes a sync issue when the containerd api returns after a container has started. It fixes it by calling the runtime start inside containerd after the oom handler has been setup. Signed-off-by: Michael Crosby --- containerd-shim/main.go | 7 ------- containerd-shim/process.go | 20 -------------------- integration-test/start_test.go | 11 ++++++++++- runtime/process.go | 29 +++++++++++++++++++++++++++-- runtime/runtime.go | 3 +++ supervisor/worker.go | 14 ++++++++++++++ 6 files changed, 54 insertions(+), 30 deletions(-) diff --git a/containerd-shim/main.go b/containerd-shim/main.go index 441cb3a..cea7b5e 100644 --- a/containerd-shim/main.go +++ b/containerd-shim/main.go @@ -136,13 +136,6 @@ func start(log *os.File) error { Height: uint16(msg.Height), } term.SetWinsize(p.console.Fd(), &ws) - case 2: - // tell runtime to execute the init process - if err := p.start(); err != nil { - p.delete() - p.Wait() - return err - } } } } diff --git a/containerd-shim/process.go b/containerd-shim/process.go index c0abfc9..72505f4 100644 --- a/containerd-shim/process.go +++ b/containerd-shim/process.go @@ -206,26 +206,6 @@ func (p *process) create() error { return nil } -func (p *process) start() error { - cwd, err := os.Getwd() - if err != nil { - return err - } - logPath := filepath.Join(cwd, "log.json") - args := append([]string{ - "--log", logPath, - "--log-format", "json", - }, p.state.RuntimeArgs...) - args = append(args, "start", p.id) - cmd := exec.Command(p.runtime, args...) - cmd.Dir = p.bundle - cmd.Stdin = p.stdio.stdin - cmd.Stdout = p.stdio.stdout - cmd.Stderr = p.stdio.stderr - cmd.SysProcAttr = setPDeathSig() - return cmd.Run() -} - func (p *process) pid() int { return p.containerPid } diff --git a/integration-test/start_test.go b/integration-test/start_test.go index 5651594..a1c7c22 100644 --- a/integration-test/start_test.go +++ b/integration-test/start_test.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "os" "path/filepath" "syscall" "time" @@ -326,6 +327,9 @@ func (cs *ContainerdSuite) TestOOM(t *check.C) { spec.Linux.Resources.Memory = &ocs.Memory{ Limit: &limit, } + if swapEnabled() { + spec.Linux.Resources.Memory.Swap = &limit + } }); err != nil { t.Fatal(err) } @@ -362,7 +366,7 @@ func (cs *ContainerdSuite) TestOOM(t *check.C) { evt.Timestamp = e.Timestamp t.Assert(*e, checker.Equals, evt) case <-time.After(60 * time.Second): - t.Fatal("Container took more than 10 seconds to terminate") + t.Fatalf("Container took more than 60 seconds to %s", evt.Type) } } } @@ -479,3 +483,8 @@ func (cs *ContainerdSuite) TestRestart(t *check.C) { t.Assert(containers[i].Status, checker.Equals, "running") } } + +func swapEnabled() bool { + _, err := os.Stat("/sys/fs/cgroup/memory/memory.memsw.limit_in_bytes") + return err == nil +} diff --git a/runtime/process.go b/runtime/process.go index ce7a1c3..cfcb659 100644 --- a/runtime/process.go +++ b/runtime/process.go @@ -260,8 +260,33 @@ func (p *process) Signal(s os.Signal) error { // This should only be called on the process with ID "init" func (p *process) Start() error { if p.ID() == InitProcessID { - _, err := fmt.Fprintf(p.controlPipe, "%d %d %d\n", 2, 0, 0) - return err + var ( + errC = make(chan error, 1) + shimExit = make(chan struct{}, 1) + args = append(p.container.runtimeArgs, "start", p.container.id) + cmd = exec.Command(p.container.runtime, args...) + ) + go func() { + out, err := cmd.CombinedOutput() + if err != nil { + errC <- fmt.Errorf("%s: %q", err.Error(), out) + } + errC <- nil + }() + go func() { + p.Wait() + close(shimExit) + }() + select { + case err := <-errC: + if err != nil { + return err + } + case <-shimExit: + cmd.Process.Kill() + cmd.Wait() + return ErrShimExited + } } return nil } diff --git a/runtime/runtime.go b/runtime/runtime.go index 88044eb..f1f7504 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -20,6 +20,9 @@ var ( // ErrContainerStartTimeout is returned if a container takes too // long to start ErrContainerStartTimeout = errors.New("containerd: container did not start before the specified timeout") + // ErrShimExited is returned if the shim or the contianer's init process + // exits before completing + ErrShimExited = errors.New("containerd: shim exited before container process was started") errNoPidFile = errors.New("containerd: no process pid file found") errInvalidPidInt = errors.New("containerd: process pid is invalid") diff --git a/supervisor/worker.go b/supervisor/worker.go index 5c242ca..e226c21 100644 --- a/supervisor/worker.go +++ b/supervisor/worker.go @@ -62,9 +62,23 @@ func (w *worker) Start() { } if err := w.s.monitorProcess(process); err != nil { logrus.WithField("error", err).Error("containerd: add process to monitor") + t.Err <- err + evt := &DeleteTask{ + ID: t.Container.ID(), + NoEvent: true, + } + w.s.SendTask(evt) + continue } if err := process.Start(); err != nil { logrus.WithField("error", err).Error("containerd: start init process") + t.Err <- err + evt := &DeleteTask{ + ID: t.Container.ID(), + NoEvent: true, + } + w.s.SendTask(evt) + continue } ContainerStartTimer.UpdateSince(started) t.Err <- nil