Merge pull request #168 from docker/timeout

Add timeout flag for container start times
This commit is contained in:
Michael Crosby 2016-04-05 15:04:39 -07:00
commit 5e5daf2b33
6 changed files with 79 additions and 37 deletions

View file

@ -59,6 +59,11 @@ var daemonFlags = []cli.Flag{
Name: "pprof-address", Name: "pprof-address",
Usage: "http address to listen for pprof events", Usage: "http address to listen for pprof events",
}, },
cli.DurationFlag{
Name: "start-timeout",
Value: 15 * time.Second,
Usage: "timeout duration for waiting on a container to start before it is killed",
},
} }
func main() { func main() {
@ -81,6 +86,7 @@ func main() {
10, 10,
context.String("runtime"), context.String("runtime"),
context.StringSlice("runtime-args"), context.StringSlice("runtime-args"),
context.Duration("start-timeout"),
); err != nil { ); err != nil {
logrus.Fatal(err) logrus.Fatal(err)
} }
@ -90,7 +96,7 @@ func main() {
} }
} }
func daemon(address, stateDir string, concurrency int, runtimeName string, runtimeArgs []string) error { func daemon(address, stateDir string, concurrency int, runtimeName string, runtimeArgs []string, timeout time.Duration) error {
// setup a standard reaper so that we don't leave any zombies if we are still alive // setup a standard reaper so that we don't leave any zombies if we are still alive
// this is just good practice because we are spawning new processes // this is just good practice because we are spawning new processes
s := make(chan os.Signal, 2048) s := make(chan os.Signal, 2048)
@ -98,7 +104,7 @@ func daemon(address, stateDir string, concurrency int, runtimeName string, runti
if err := osutils.SetSubreaper(1); err != nil { if err := osutils.SetSubreaper(1); err != nil {
logrus.WithField("error", err).Error("containerd: set subpreaper") logrus.WithField("error", err).Error("containerd: set subpreaper")
} }
sv, err := supervisor.New(stateDir, runtimeName, runtimeArgs) sv, err := supervisor.New(stateDir, runtimeName, runtimeArgs, timeout)
if err != nil { if err != nil {
return err return err
} }

View file

@ -7,6 +7,7 @@ import (
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"time"
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/docker/containerd/specs" "github.com/docker/containerd/specs"
@ -90,6 +91,7 @@ type ContainerOpts struct {
RuntimeArgs []string RuntimeArgs []string
Labels []string Labels []string
NoPivotRoot bool NoPivotRoot bool
Timeout time.Duration
} }
// New returns a new container // New returns a new container
@ -103,6 +105,7 @@ func New(opts ContainerOpts) (Container, error) {
runtime: opts.Runtime, runtime: opts.Runtime,
runtimeArgs: opts.RuntimeArgs, runtimeArgs: opts.RuntimeArgs,
noPivotRoot: opts.NoPivotRoot, noPivotRoot: opts.NoPivotRoot,
timeout: opts.Timeout,
} }
if err := os.Mkdir(filepath.Join(c.root, c.id), 0755); err != nil { if err := os.Mkdir(filepath.Join(c.root, c.id), 0755); err != nil {
return nil, err return nil, err
@ -191,6 +194,7 @@ type container struct {
labels []string labels []string
oomFds []int oomFds []int
noPivotRoot bool noPivotRoot bool
timeout time.Duration
} }
func (c *container) ID() string { func (c *container) ID() string {
@ -223,8 +227,9 @@ func (c *container) Delete() error {
args := c.runtimeArgs args := c.runtimeArgs
args = append(args, "delete", c.id) args = append(args, "delete", c.id)
exec.Command(c.runtime, args...).Run() if derr := exec.Command(c.runtime, args...).Run(); err == nil {
err = derr
}
return err return err
} }

View file

@ -224,7 +224,7 @@ func (c *container) startCmd(pid string, cmd *exec.Cmd, p *process) error {
} }
return err return err
} }
if err := waitForStart(p, cmd); err != nil { if err := c.waitForStart(p, cmd); err != nil {
return err return err
} }
c.processes[pid] = p c.processes[pid] = p
@ -335,49 +335,76 @@ func (c *container) writeEventFD(root string, cfd, efd int) error {
return err return err
} }
func waitForStart(p *process, cmd *exec.Cmd) error { type waitArgs struct {
for i := 0; i < 300; i++ { pid int
if _, err := p.getPidFromFile(); err != nil { err error
if os.IsNotExist(err) || err == errInvalidPidInt { }
alive, err := isAlive(cmd)
if err != nil { func (c *container) waitForStart(p *process, cmd *exec.Cmd) error {
return err wc := make(chan error, 1)
} go func() {
if !alive { for {
// runc could have failed to run the container so lets get the error if _, err := p.getPidFromFile(); err != nil {
// out of the logs or the shim could have encountered an error if os.IsNotExist(err) || err == errInvalidPidInt {
messages, err := readLogMessages(filepath.Join(p.root, "shim-log.json")) alive, err := isAlive(cmd)
if err != nil { if err != nil {
return err wc <- err
return
} }
for _, m := range messages { if !alive {
if m.Level == "error" { // runc could have failed to run the container so lets get the error
return fmt.Errorf("shim error: %v", m.Msg) // out of the logs or the shim could have encountered an error
messages, err := readLogMessages(filepath.Join(p.root, "shim-log.json"))
if err != nil {
wc <- err
return
} }
} for _, m := range messages {
// no errors reported back from shim, check for runc/runtime errors if m.Level == "error" {
messages, err = readLogMessages(filepath.Join(p.root, "log.json")) wc <- fmt.Errorf("shim error: %v", m.Msg)
if err != nil { return
if os.IsNotExist(err) { }
return ErrContainerNotStarted
} }
return err // no errors reported back from shim, check for runc/runtime errors
} messages, err = readLogMessages(filepath.Join(p.root, "log.json"))
for _, m := range messages { if err != nil {
if m.Level == "error" { if os.IsNotExist(err) {
return fmt.Errorf("oci runtime error: %v", m.Msg) err = ErrContainerNotStarted
}
wc <- err
return
} }
for _, m := range messages {
if m.Level == "error" {
wc <- fmt.Errorf("oci runtime error: %v", m.Msg)
return
}
}
wc <- ErrContainerNotStarted
return
} }
return ErrContainerNotStarted time.Sleep(15 * time.Millisecond)
continue
} }
time.Sleep(50 * time.Millisecond) wc <- err
continue return
} }
// the pid file was read successfully
wc <- nil
return
}
}()
select {
case err := <-wc:
if err != nil {
return err return err
} }
return nil return nil
case <-time.After(c.timeout):
cmd.Process.Kill()
cmd.Wait()
return ErrContainerStartTimeout
} }
return errNoPidFile
} }
// isAlive checks if the shim that launched the container is still alive // isAlive checks if the shim that launched the container is still alive

View file

@ -17,6 +17,7 @@ var (
ErrProcessNotExited = errors.New("containerd: process has not exited") ErrProcessNotExited = errors.New("containerd: process has not exited")
ErrProcessExited = errors.New("containerd: process has exited") ErrProcessExited = errors.New("containerd: process has exited")
ErrContainerNotStarted = errors.New("containerd: container not started") ErrContainerNotStarted = errors.New("containerd: container not started")
ErrContainerStartTimeout = errors.New("containerd: container did not start before the specified timeout")
errNoPidFile = errors.New("containerd: no process pid file found") errNoPidFile = errors.New("containerd: no process pid file found")
errInvalidPidInt = errors.New("containerd: process pid is invalid") errInvalidPidInt = errors.New("containerd: process pid is invalid")

View file

@ -29,6 +29,7 @@ func (s *Supervisor) start(t *StartTask) error {
RuntimeArgs: s.runtimeArgs, RuntimeArgs: s.runtimeArgs,
Labels: t.Labels, Labels: t.Labels,
NoPivotRoot: t.NoPivotRoot, NoPivotRoot: t.NoPivotRoot,
Timeout: s.timeout,
}) })
if err != nil { if err != nil {
return err return err

View file

@ -18,7 +18,7 @@ const (
) )
// New returns an initialized Process supervisor. // New returns an initialized Process supervisor.
func New(stateDir string, runtimeName string, runtimeArgs []string) (*Supervisor, error) { func New(stateDir string, runtimeName string, runtimeArgs []string, timeout time.Duration) (*Supervisor, error) {
startTasks := make(chan *startTask, 10) startTasks := make(chan *startTask, 10)
if err := os.MkdirAll(stateDir, 0755); err != nil { if err := os.MkdirAll(stateDir, 0755); err != nil {
return nil, err return nil, err
@ -41,6 +41,7 @@ func New(stateDir string, runtimeName string, runtimeArgs []string) (*Supervisor
monitor: monitor, monitor: monitor,
runtime: runtimeName, runtime: runtimeName,
runtimeArgs: runtimeArgs, runtimeArgs: runtimeArgs,
timeout: timeout,
} }
if err := setupEventLog(s); err != nil { if err := setupEventLog(s); err != nil {
return nil, err return nil, err
@ -118,6 +119,7 @@ type Supervisor struct {
tasks chan Task tasks chan Task
monitor *Monitor monitor *Monitor
eventLog []Event eventLog []Event
timeout time.Duration
} }
// Stop closes all startTasks and sends a SIGTERM to each container's pid1 then waits for they to // Stop closes all startTasks and sends a SIGTERM to each container's pid1 then waits for they to