Merge pull request #583 from alexlarsson/conmon-reap-zombies
conmon: Don't leave zombies and fix cgroup race
This commit is contained in:
commit
88037b143b
3 changed files with 121 additions and 82 deletions
|
@ -677,8 +677,10 @@ static gboolean terminal_accept_cb(int fd, G_GNUC_UNUSED GIOCondition condition,
|
|||
|
||||
ninfo("about to accept from csfd: %d", fd);
|
||||
connfd = accept4(fd, NULL, NULL, SOCK_CLOEXEC);
|
||||
if (connfd < 0)
|
||||
pexit("Failed to accept console-socket connection");
|
||||
if (connfd < 0) {
|
||||
nwarn("Failed to accept console-socket connection");
|
||||
return G_SOURCE_CONTINUE;
|
||||
}
|
||||
|
||||
/* Not accepting anything else. */
|
||||
close(fd);
|
||||
|
@ -704,7 +706,6 @@ static gboolean terminal_accept_cb(int fd, G_GNUC_UNUSED GIOCondition condition,
|
|||
* stdout. stderr is ignored. */
|
||||
masterfd_stdin = console.fd;
|
||||
masterfd_stdout = console.fd;
|
||||
masterfd_stderr = -1;
|
||||
|
||||
/* Clean up everything */
|
||||
close(connfd);
|
||||
|
@ -730,7 +731,7 @@ int main(int argc, char *argv[])
|
|||
_cleanup_free_ char *contents;
|
||||
int cpid = -1;
|
||||
int status;
|
||||
pid_t pid, create_pid;
|
||||
pid_t pid, main_pid, create_pid;
|
||||
_cleanup_close_ int epfd = -1;
|
||||
_cleanup_close_ int csfd = -1;
|
||||
/* Used for !terminal cases. */
|
||||
|
@ -741,11 +742,15 @@ int main(int argc, char *argv[])
|
|||
char buf[BUF_SIZE];
|
||||
int num_read;
|
||||
int sync_pipe_fd = -1;
|
||||
char *sync_pipe, *endptr;
|
||||
int start_pipe_fd = -1;
|
||||
char *start_pipe, *sync_pipe, *endptr;
|
||||
int len;
|
||||
GError *error = NULL;
|
||||
GOptionContext *context;
|
||||
GPtrArray *runtime_argv = NULL;
|
||||
_cleanup_close_ int dev_null_r = -1;
|
||||
_cleanup_close_ int dev_null_w = -1;
|
||||
int fds[2];
|
||||
|
||||
_cleanup_free_ char *memory_cgroup_path = NULL;
|
||||
int wb;
|
||||
|
@ -776,6 +781,14 @@ int main(int argc, char *argv[])
|
|||
bundle_path = cwd;
|
||||
}
|
||||
|
||||
dev_null_r = open("/dev/null", O_RDONLY | O_CLOEXEC);
|
||||
if (dev_null_r < 0)
|
||||
pexit("Failed to open /dev/null");
|
||||
|
||||
dev_null_w = open("/dev/null", O_WRONLY | O_CLOEXEC);
|
||||
if (dev_null_w < 0)
|
||||
pexit("Failed to open /dev/null");
|
||||
|
||||
if (exec && exec_process_spec == NULL) {
|
||||
nexit("Exec process spec path not provided. Use --exec-process-spec");
|
||||
}
|
||||
|
@ -791,6 +804,44 @@ int main(int argc, char *argv[])
|
|||
if (log_path == NULL)
|
||||
nexit("Log file path not provided. Use --log-path");
|
||||
|
||||
start_pipe = getenv("_OCI_STARTPIPE");
|
||||
if (start_pipe) {
|
||||
errno = 0;
|
||||
start_pipe_fd = strtol(start_pipe, &endptr, 10);
|
||||
if (errno != 0 || *endptr != '\0')
|
||||
pexit("unable to parse _OCI_STARTPIPE");
|
||||
/* Block for an initial write to the start pipe before
|
||||
spawning any childred or exiting, to ensure the
|
||||
parent can put us in the right cgroup. */
|
||||
read(start_pipe_fd, buf, BUF_SIZE);
|
||||
close(start_pipe_fd);
|
||||
}
|
||||
|
||||
if (!exec) {
|
||||
/* In the create-container case we double-fork in
|
||||
order to disconnect from the parent, as we want to
|
||||
continue in a daemon-like way */
|
||||
main_pid = fork();
|
||||
if (main_pid < 0) {
|
||||
pexit("Failed to fork the create command");
|
||||
} else if (main_pid != 0) {
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* Disconnect stdio from parent. We need to do this, because
|
||||
the parent is waiting for the stdout to end when the intermediate
|
||||
child dies */
|
||||
if (dup2(dev_null_r, STDIN_FILENO) < 0)
|
||||
pexit("Failed to dup over stdout");
|
||||
if (dup2(dev_null_w, STDOUT_FILENO) < 0)
|
||||
pexit("Failed to dup over stdout");
|
||||
if (dup2(dev_null_w, STDERR_FILENO) < 0)
|
||||
pexit("Failed to dup over stderr");
|
||||
|
||||
/* Create a new session group */
|
||||
setsid();
|
||||
}
|
||||
|
||||
/* Environment variables */
|
||||
sync_pipe = getenv("_OCI_SYNCPIPE");
|
||||
if (sync_pipe) {
|
||||
|
@ -848,7 +899,6 @@ int main(int argc, char *argv[])
|
|||
if (listen(csfd, 128) < 0)
|
||||
pexit("Failed to listen on console-socket");
|
||||
} else {
|
||||
int fds[2];
|
||||
|
||||
/*
|
||||
* Create a "fake" master fd so that we can use the same epoll code in
|
||||
|
@ -873,13 +923,15 @@ int main(int argc, char *argv[])
|
|||
|
||||
masterfd_stdout = fds[0];
|
||||
slavefd_stdout = fds[1];
|
||||
}
|
||||
|
||||
/* We always create a stderr pipe, because that way we can capture
|
||||
runc stderr messages before the tty is created */
|
||||
if (pipe2(fds, O_CLOEXEC) < 0)
|
||||
pexit("Failed to create !terminal stderr pipe");
|
||||
pexit("Failed to create stderr pipe");
|
||||
|
||||
masterfd_stderr = fds[0];
|
||||
slavefd_stderr = fds[1];
|
||||
}
|
||||
|
||||
runtime_argv = g_ptr_array_new();
|
||||
g_ptr_array_add(runtime_argv, runtime_path);
|
||||
|
@ -929,25 +981,21 @@ int main(int argc, char *argv[])
|
|||
if (create_pid < 0) {
|
||||
pexit("Failed to fork the create command");
|
||||
} else if (!create_pid) {
|
||||
_cleanup_close_ int dev_null = -1;
|
||||
/* FIXME: This results in us not outputting runc error messages to crio's log. */
|
||||
if (slavefd_stdin < 0) {
|
||||
dev_null = open("/dev/null", O_RDONLY);
|
||||
if (dev_null < 0)
|
||||
pexit("Failed to open /dev/null");
|
||||
slavefd_stdin = dev_null;
|
||||
}
|
||||
if (slavefd_stdin < 0)
|
||||
slavefd_stdin = dev_null_r;
|
||||
if (dup2(slavefd_stdin, STDIN_FILENO) < 0)
|
||||
pexit("Failed to dup over stdout");
|
||||
|
||||
if (slavefd_stdout >= 0) {
|
||||
if (slavefd_stdout < 0)
|
||||
slavefd_stdout = dev_null_w;
|
||||
if (dup2(slavefd_stdout, STDOUT_FILENO) < 0)
|
||||
pexit("Failed to dup over stdout");
|
||||
}
|
||||
if (slavefd_stderr >= 0) {
|
||||
|
||||
if (slavefd_stderr < 0)
|
||||
slavefd_stderr = slavefd_stdout;
|
||||
if (dup2(slavefd_stderr, STDERR_FILENO) < 0)
|
||||
pexit("Failed to dup over stderr");
|
||||
}
|
||||
|
||||
execv(g_ptr_array_index(runtime_argv,0), (char **)runtime_argv->pdata);
|
||||
exit(127);
|
||||
|
@ -978,16 +1026,6 @@ int main(int argc, char *argv[])
|
|||
|
||||
if (!WIFEXITED(runtime_status) || WEXITSTATUS(runtime_status) != 0) {
|
||||
if (sync_pipe_fd > 0 && !exec) {
|
||||
if (terminal) {
|
||||
/*
|
||||
* For this case, the stderr is captured in the parent when terminal is passed down.
|
||||
* We send -1 as pid to signal to parent that create container has failed.
|
||||
*/
|
||||
len = snprintf(buf, BUF_SIZE, "{\"pid\": %d}\n", -1);
|
||||
if (len < 0 || write_all(sync_pipe_fd, buf, len) != len) {
|
||||
pexit("unable to send container pid to parent");
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Read from container stderr for any error and send it to parent
|
||||
* We send -1 as pid to signal to parent that create container has failed.
|
||||
|
@ -1006,12 +1044,11 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
nexit("Failed to create container: exit status %d", WEXITSTATUS(runtime_status));
|
||||
}
|
||||
|
||||
if (terminal && masterfd_stdout == -1)
|
||||
pexit("Runtime did not set up terminal");
|
||||
nexit("Runtime did not set up terminal");
|
||||
|
||||
/* Read the pid so we can wait for the process to exit */
|
||||
g_file_get_contents(pid_file, &contents, NULL, &err);
|
||||
|
|
48
oci/oci.go
48
oci/oci.go
|
@ -128,10 +128,12 @@ func getOCIVersion(name string, args ...string) (string, error) {
|
|||
func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||
var stderrBuf bytes.Buffer
|
||||
parentPipe, childPipe, err := newPipe()
|
||||
childStartPipe, parentStartPipe, err := newPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating socket pair: %v", err)
|
||||
}
|
||||
defer parentPipe.Close()
|
||||
defer parentStartPipe.Close()
|
||||
|
||||
var args []string
|
||||
if r.cgroupManager == "systemd" {
|
||||
|
@ -163,9 +165,10 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
if c.terminal {
|
||||
cmd.Stderr = &stderrBuf
|
||||
}
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
|
||||
// 0, 1 and 2 are stdin, stdout and stderr
|
||||
cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
|
||||
|
||||
err = cmd.Start()
|
||||
if err != nil {
|
||||
|
@ -175,6 +178,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
|
||||
// We don't need childPipe on the parent side
|
||||
childPipe.Close()
|
||||
childStartPipe.Close()
|
||||
|
||||
// Move conmon to specified cgroup
|
||||
if cgroupParent != "" {
|
||||
|
@ -186,6 +190,19 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
}
|
||||
}
|
||||
|
||||
/* We set the cgroup, now the child can start creating children */
|
||||
someData := []byte{0}
|
||||
_, err = parentStartPipe.Write(someData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
/* Wait for initial setup and fork, and reap child */
|
||||
err = cmd.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait to get container pid from conmon
|
||||
type syncStruct struct {
|
||||
si *syncInfo
|
||||
|
@ -207,21 +224,10 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
return fmt.Errorf("error reading container (probably exited) json message: %v", ss.err)
|
||||
}
|
||||
logrus.Debugf("Received container pid: %d", ss.si.Pid)
|
||||
errorMessage := ""
|
||||
if c.terminal {
|
||||
errorMessage = stderrBuf.String()
|
||||
fmt.Fprintf(os.Stderr, errorMessage)
|
||||
errorMessage = sanitizeConmonErrorMessage(errorMessage)
|
||||
} else {
|
||||
if ss.si.Message != "" {
|
||||
errorMessage = ss.si.Message
|
||||
}
|
||||
}
|
||||
|
||||
if ss.si.Pid == -1 {
|
||||
if errorMessage != "" {
|
||||
logrus.Debugf("Container creation error: %s", errorMessage)
|
||||
return fmt.Errorf("container create failed: %s", errorMessage)
|
||||
if ss.si.Message != "" {
|
||||
logrus.Debugf("Container creation error: %s", ss.si.Message)
|
||||
return fmt.Errorf("container create failed: %s", ss.si.Message)
|
||||
}
|
||||
logrus.Debugf("Container creation failed")
|
||||
return fmt.Errorf("container create failed")
|
||||
|
@ -232,18 +238,6 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// sanitizeConmonErrorMessage removes conmon debug messages from error string
|
||||
func sanitizeConmonErrorMessage(errString string) string {
|
||||
var sanitizedLines []string
|
||||
lines := strings.Split(errString, "\n")
|
||||
for _, line := range lines {
|
||||
if !strings.HasPrefix(line, "[conmon") {
|
||||
sanitizedLines = append(sanitizedLines, line)
|
||||
}
|
||||
}
|
||||
return strings.Join(sanitizedLines, "\n")
|
||||
}
|
||||
|
||||
func createUnitName(prefix string, name string) string {
|
||||
return fmt.Sprintf("%s-%s.scope", prefix, name)
|
||||
}
|
||||
|
|
|
@ -69,10 +69,18 @@ func RunUnderSystemdScope(pid int, slice string, unitName string) error {
|
|||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
properties = append(properties, newProp("DefaultDependencies", false))
|
||||
_, err = conn.StartTransientUnit(unitName, "replace", properties, nil)
|
||||
ch := make(chan string)
|
||||
_, err = conn.StartTransientUnit(unitName, "replace", properties, ch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Block until job is started
|
||||
<-ch
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
|
|
Loading…
Reference in a new issue