conmon: Don't leave zombies and fix cgroup race
Currently, when creating containers we never call Wait on the conmon exec.Command, which means that the child hangs around forever as a zombie after it dies. However, instead of doing this waitpid() in the parent we instead do a double-fork in conmon, to daemonize it. That makes a lot of sense, as conmon really is not tied to the launcher, but needs to outlive it if e.g. the cri-o daemon restarts. However, this makes even more obvious a race condition which we already have. When crio-d puts the conmon pid in a cgroup there is a race where conmon could already have spawned a child, and it would then not be part of the cgroup. In order to fix this we add another synchronization pipe to conmon, which we block on before we create any children. The parent then makes sure the pid is in the cgroup before letting it continue. Signed-off-by: Alexander Larsson <alexl@redhat.com>
This commit is contained in:
parent
7b9032bac7
commit
af4fbcd942
2 changed files with 80 additions and 18 deletions
19
oci/oci.go
19
oci/oci.go
|
@ -104,10 +104,12 @@ func getOCIVersion(name string, args ...string) (string, error) {
|
|||
func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||
var stderrBuf bytes.Buffer
|
||||
parentPipe, childPipe, err := newPipe()
|
||||
childStartPipe, parentStartPipe, err := newPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating socket pair: %v", err)
|
||||
}
|
||||
defer parentPipe.Close()
|
||||
defer parentStartPipe.Close()
|
||||
|
||||
var args []string
|
||||
if r.cgroupManager == "systemd" {
|
||||
|
@ -139,9 +141,10 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
if c.terminal {
|
||||
cmd.Stderr = &stderrBuf
|
||||
}
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
|
||||
// 0, 1 and 2 are stdin, stdout and stderr
|
||||
cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
|
||||
|
||||
err = cmd.Start()
|
||||
if err != nil {
|
||||
|
@ -151,6 +154,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
|
||||
// We don't need childPipe on the parent side
|
||||
childPipe.Close()
|
||||
childStartPipe.Close()
|
||||
|
||||
// Move conmon to specified cgroup
|
||||
if cgroupParent != "" {
|
||||
|
@ -162,6 +166,19 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
}
|
||||
}
|
||||
|
||||
/* We set the cgroup, now the child can start creating children */
|
||||
someData := []byte{0}
|
||||
_, err = parentStartPipe.Write(someData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
/* Wait for initial setup and fork, and reap child */
|
||||
err = cmd.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait to get container pid from conmon
|
||||
type syncStruct struct {
|
||||
si *syncInfo
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue