conmon: Don't leave zombies and fix cgroup race

Currently, when creating containers we never call Wait on the
conmon exec.Command, which means that the child hangs around
forever as a zombie after it dies.

However, instead of doing this waitpid() in the parent we instead
do a double-fork in conmon, to daemonize it. That makes a lot of
sense, as conmon really is not tied to the launcher, but needs
to outlive it if e.g. the cri-o daemon restarts.

However, this makes even more obvious a race condition which we
already have. When crio-d puts the conmon pid in a cgroup there
is a race where conmon could already have spawned a child, and
it would then not be part of the cgroup. In order to fix this
we add another synchronization pipe to conmon, which we block
on before we create any children. The parent then makes sure the
pid is in the cgroup before letting it continue.

Signed-off-by: Alexander Larsson <alexl@redhat.com>
This commit is contained in:
Alexander Larsson 2017-06-09 16:18:27 +02:00
parent 7b9032bac7
commit af4fbcd942
2 changed files with 80 additions and 18 deletions

View file

@ -104,10 +104,12 @@ func getOCIVersion(name string, args ...string) (string, error) {
func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
var stderrBuf bytes.Buffer
parentPipe, childPipe, err := newPipe()
childStartPipe, parentStartPipe, err := newPipe()
if err != nil {
return fmt.Errorf("error creating socket pair: %v", err)
}
defer parentPipe.Close()
defer parentStartPipe.Close()
var args []string
if r.cgroupManager == "systemd" {
@ -139,9 +141,10 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
if c.terminal {
cmd.Stderr = &stderrBuf
}
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
// 0, 1 and 2 are stdin, stdout and stderr
cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
err = cmd.Start()
if err != nil {
@ -151,6 +154,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
// We don't need childPipe on the parent side
childPipe.Close()
childStartPipe.Close()
// Move conmon to specified cgroup
if cgroupParent != "" {
@ -162,6 +166,19 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
}
}
/* We set the cgroup, now the child can start creating children */
someData := []byte{0}
_, err = parentStartPipe.Write(someData)
if err != nil {
return err
}
/* Wait for initial setup and fork, and reap child */
err = cmd.Wait()
if err != nil {
return err
}
// Wait to get container pid from conmon
type syncStruct struct {
si *syncInfo