Merge pull request #583 from alexlarsson/conmon-reap-zombies
conmon: Don't leave zombies and fix cgroup race
This commit is contained in:
commit
88037b143b
3 changed files with 121 additions and 82 deletions
|
@ -677,8 +677,10 @@ static gboolean terminal_accept_cb(int fd, G_GNUC_UNUSED GIOCondition condition,
|
||||||
|
|
||||||
ninfo("about to accept from csfd: %d", fd);
|
ninfo("about to accept from csfd: %d", fd);
|
||||||
connfd = accept4(fd, NULL, NULL, SOCK_CLOEXEC);
|
connfd = accept4(fd, NULL, NULL, SOCK_CLOEXEC);
|
||||||
if (connfd < 0)
|
if (connfd < 0) {
|
||||||
pexit("Failed to accept console-socket connection");
|
nwarn("Failed to accept console-socket connection");
|
||||||
|
return G_SOURCE_CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
/* Not accepting anything else. */
|
/* Not accepting anything else. */
|
||||||
close(fd);
|
close(fd);
|
||||||
|
@ -704,7 +706,6 @@ static gboolean terminal_accept_cb(int fd, G_GNUC_UNUSED GIOCondition condition,
|
||||||
* stdout. stderr is ignored. */
|
* stdout. stderr is ignored. */
|
||||||
masterfd_stdin = console.fd;
|
masterfd_stdin = console.fd;
|
||||||
masterfd_stdout = console.fd;
|
masterfd_stdout = console.fd;
|
||||||
masterfd_stderr = -1;
|
|
||||||
|
|
||||||
/* Clean up everything */
|
/* Clean up everything */
|
||||||
close(connfd);
|
close(connfd);
|
||||||
|
@ -730,7 +731,7 @@ int main(int argc, char *argv[])
|
||||||
_cleanup_free_ char *contents;
|
_cleanup_free_ char *contents;
|
||||||
int cpid = -1;
|
int cpid = -1;
|
||||||
int status;
|
int status;
|
||||||
pid_t pid, create_pid;
|
pid_t pid, main_pid, create_pid;
|
||||||
_cleanup_close_ int epfd = -1;
|
_cleanup_close_ int epfd = -1;
|
||||||
_cleanup_close_ int csfd = -1;
|
_cleanup_close_ int csfd = -1;
|
||||||
/* Used for !terminal cases. */
|
/* Used for !terminal cases. */
|
||||||
|
@ -741,11 +742,15 @@ int main(int argc, char *argv[])
|
||||||
char buf[BUF_SIZE];
|
char buf[BUF_SIZE];
|
||||||
int num_read;
|
int num_read;
|
||||||
int sync_pipe_fd = -1;
|
int sync_pipe_fd = -1;
|
||||||
char *sync_pipe, *endptr;
|
int start_pipe_fd = -1;
|
||||||
|
char *start_pipe, *sync_pipe, *endptr;
|
||||||
int len;
|
int len;
|
||||||
GError *error = NULL;
|
GError *error = NULL;
|
||||||
GOptionContext *context;
|
GOptionContext *context;
|
||||||
GPtrArray *runtime_argv = NULL;
|
GPtrArray *runtime_argv = NULL;
|
||||||
|
_cleanup_close_ int dev_null_r = -1;
|
||||||
|
_cleanup_close_ int dev_null_w = -1;
|
||||||
|
int fds[2];
|
||||||
|
|
||||||
_cleanup_free_ char *memory_cgroup_path = NULL;
|
_cleanup_free_ char *memory_cgroup_path = NULL;
|
||||||
int wb;
|
int wb;
|
||||||
|
@ -776,6 +781,14 @@ int main(int argc, char *argv[])
|
||||||
bundle_path = cwd;
|
bundle_path = cwd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dev_null_r = open("/dev/null", O_RDONLY | O_CLOEXEC);
|
||||||
|
if (dev_null_r < 0)
|
||||||
|
pexit("Failed to open /dev/null");
|
||||||
|
|
||||||
|
dev_null_w = open("/dev/null", O_WRONLY | O_CLOEXEC);
|
||||||
|
if (dev_null_w < 0)
|
||||||
|
pexit("Failed to open /dev/null");
|
||||||
|
|
||||||
if (exec && exec_process_spec == NULL) {
|
if (exec && exec_process_spec == NULL) {
|
||||||
nexit("Exec process spec path not provided. Use --exec-process-spec");
|
nexit("Exec process spec path not provided. Use --exec-process-spec");
|
||||||
}
|
}
|
||||||
|
@ -791,6 +804,44 @@ int main(int argc, char *argv[])
|
||||||
if (log_path == NULL)
|
if (log_path == NULL)
|
||||||
nexit("Log file path not provided. Use --log-path");
|
nexit("Log file path not provided. Use --log-path");
|
||||||
|
|
||||||
|
start_pipe = getenv("_OCI_STARTPIPE");
|
||||||
|
if (start_pipe) {
|
||||||
|
errno = 0;
|
||||||
|
start_pipe_fd = strtol(start_pipe, &endptr, 10);
|
||||||
|
if (errno != 0 || *endptr != '\0')
|
||||||
|
pexit("unable to parse _OCI_STARTPIPE");
|
||||||
|
/* Block for an initial write to the start pipe before
|
||||||
|
spawning any childred or exiting, to ensure the
|
||||||
|
parent can put us in the right cgroup. */
|
||||||
|
read(start_pipe_fd, buf, BUF_SIZE);
|
||||||
|
close(start_pipe_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!exec) {
|
||||||
|
/* In the create-container case we double-fork in
|
||||||
|
order to disconnect from the parent, as we want to
|
||||||
|
continue in a daemon-like way */
|
||||||
|
main_pid = fork();
|
||||||
|
if (main_pid < 0) {
|
||||||
|
pexit("Failed to fork the create command");
|
||||||
|
} else if (main_pid != 0) {
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Disconnect stdio from parent. We need to do this, because
|
||||||
|
the parent is waiting for the stdout to end when the intermediate
|
||||||
|
child dies */
|
||||||
|
if (dup2(dev_null_r, STDIN_FILENO) < 0)
|
||||||
|
pexit("Failed to dup over stdout");
|
||||||
|
if (dup2(dev_null_w, STDOUT_FILENO) < 0)
|
||||||
|
pexit("Failed to dup over stdout");
|
||||||
|
if (dup2(dev_null_w, STDERR_FILENO) < 0)
|
||||||
|
pexit("Failed to dup over stderr");
|
||||||
|
|
||||||
|
/* Create a new session group */
|
||||||
|
setsid();
|
||||||
|
}
|
||||||
|
|
||||||
/* Environment variables */
|
/* Environment variables */
|
||||||
sync_pipe = getenv("_OCI_SYNCPIPE");
|
sync_pipe = getenv("_OCI_SYNCPIPE");
|
||||||
if (sync_pipe) {
|
if (sync_pipe) {
|
||||||
|
@ -848,7 +899,6 @@ int main(int argc, char *argv[])
|
||||||
if (listen(csfd, 128) < 0)
|
if (listen(csfd, 128) < 0)
|
||||||
pexit("Failed to listen on console-socket");
|
pexit("Failed to listen on console-socket");
|
||||||
} else {
|
} else {
|
||||||
int fds[2];
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a "fake" master fd so that we can use the same epoll code in
|
* Create a "fake" master fd so that we can use the same epoll code in
|
||||||
|
@ -873,13 +923,15 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
masterfd_stdout = fds[0];
|
masterfd_stdout = fds[0];
|
||||||
slavefd_stdout = fds[1];
|
slavefd_stdout = fds[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We always create a stderr pipe, because that way we can capture
|
||||||
|
runc stderr messages before the tty is created */
|
||||||
if (pipe2(fds, O_CLOEXEC) < 0)
|
if (pipe2(fds, O_CLOEXEC) < 0)
|
||||||
pexit("Failed to create !terminal stderr pipe");
|
pexit("Failed to create stderr pipe");
|
||||||
|
|
||||||
masterfd_stderr = fds[0];
|
masterfd_stderr = fds[0];
|
||||||
slavefd_stderr = fds[1];
|
slavefd_stderr = fds[1];
|
||||||
}
|
|
||||||
|
|
||||||
runtime_argv = g_ptr_array_new();
|
runtime_argv = g_ptr_array_new();
|
||||||
g_ptr_array_add(runtime_argv, runtime_path);
|
g_ptr_array_add(runtime_argv, runtime_path);
|
||||||
|
@ -929,25 +981,21 @@ int main(int argc, char *argv[])
|
||||||
if (create_pid < 0) {
|
if (create_pid < 0) {
|
||||||
pexit("Failed to fork the create command");
|
pexit("Failed to fork the create command");
|
||||||
} else if (!create_pid) {
|
} else if (!create_pid) {
|
||||||
_cleanup_close_ int dev_null = -1;
|
|
||||||
/* FIXME: This results in us not outputting runc error messages to crio's log. */
|
/* FIXME: This results in us not outputting runc error messages to crio's log. */
|
||||||
if (slavefd_stdin < 0) {
|
if (slavefd_stdin < 0)
|
||||||
dev_null = open("/dev/null", O_RDONLY);
|
slavefd_stdin = dev_null_r;
|
||||||
if (dev_null < 0)
|
|
||||||
pexit("Failed to open /dev/null");
|
|
||||||
slavefd_stdin = dev_null;
|
|
||||||
}
|
|
||||||
if (dup2(slavefd_stdin, STDIN_FILENO) < 0)
|
if (dup2(slavefd_stdin, STDIN_FILENO) < 0)
|
||||||
pexit("Failed to dup over stdout");
|
pexit("Failed to dup over stdout");
|
||||||
|
|
||||||
if (slavefd_stdout >= 0) {
|
if (slavefd_stdout < 0)
|
||||||
|
slavefd_stdout = dev_null_w;
|
||||||
if (dup2(slavefd_stdout, STDOUT_FILENO) < 0)
|
if (dup2(slavefd_stdout, STDOUT_FILENO) < 0)
|
||||||
pexit("Failed to dup over stdout");
|
pexit("Failed to dup over stdout");
|
||||||
}
|
|
||||||
if (slavefd_stderr >= 0) {
|
if (slavefd_stderr < 0)
|
||||||
|
slavefd_stderr = slavefd_stdout;
|
||||||
if (dup2(slavefd_stderr, STDERR_FILENO) < 0)
|
if (dup2(slavefd_stderr, STDERR_FILENO) < 0)
|
||||||
pexit("Failed to dup over stderr");
|
pexit("Failed to dup over stderr");
|
||||||
}
|
|
||||||
|
|
||||||
execv(g_ptr_array_index(runtime_argv,0), (char **)runtime_argv->pdata);
|
execv(g_ptr_array_index(runtime_argv,0), (char **)runtime_argv->pdata);
|
||||||
exit(127);
|
exit(127);
|
||||||
|
@ -978,16 +1026,6 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
if (!WIFEXITED(runtime_status) || WEXITSTATUS(runtime_status) != 0) {
|
if (!WIFEXITED(runtime_status) || WEXITSTATUS(runtime_status) != 0) {
|
||||||
if (sync_pipe_fd > 0 && !exec) {
|
if (sync_pipe_fd > 0 && !exec) {
|
||||||
if (terminal) {
|
|
||||||
/*
|
|
||||||
* For this case, the stderr is captured in the parent when terminal is passed down.
|
|
||||||
* We send -1 as pid to signal to parent that create container has failed.
|
|
||||||
*/
|
|
||||||
len = snprintf(buf, BUF_SIZE, "{\"pid\": %d}\n", -1);
|
|
||||||
if (len < 0 || write_all(sync_pipe_fd, buf, len) != len) {
|
|
||||||
pexit("unable to send container pid to parent");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/*
|
/*
|
||||||
* Read from container stderr for any error and send it to parent
|
* Read from container stderr for any error and send it to parent
|
||||||
* We send -1 as pid to signal to parent that create container has failed.
|
* We send -1 as pid to signal to parent that create container has failed.
|
||||||
|
@ -1006,12 +1044,11 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
nexit("Failed to create container: exit status %d", WEXITSTATUS(runtime_status));
|
nexit("Failed to create container: exit status %d", WEXITSTATUS(runtime_status));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (terminal && masterfd_stdout == -1)
|
if (terminal && masterfd_stdout == -1)
|
||||||
pexit("Runtime did not set up terminal");
|
nexit("Runtime did not set up terminal");
|
||||||
|
|
||||||
/* Read the pid so we can wait for the process to exit */
|
/* Read the pid so we can wait for the process to exit */
|
||||||
g_file_get_contents(pid_file, &contents, NULL, &err);
|
g_file_get_contents(pid_file, &contents, NULL, &err);
|
||||||
|
|
48
oci/oci.go
48
oci/oci.go
|
@ -128,10 +128,12 @@ func getOCIVersion(name string, args ...string) (string, error) {
|
||||||
func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||||
var stderrBuf bytes.Buffer
|
var stderrBuf bytes.Buffer
|
||||||
parentPipe, childPipe, err := newPipe()
|
parentPipe, childPipe, err := newPipe()
|
||||||
|
childStartPipe, parentStartPipe, err := newPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error creating socket pair: %v", err)
|
return fmt.Errorf("error creating socket pair: %v", err)
|
||||||
}
|
}
|
||||||
defer parentPipe.Close()
|
defer parentPipe.Close()
|
||||||
|
defer parentStartPipe.Close()
|
||||||
|
|
||||||
var args []string
|
var args []string
|
||||||
if r.cgroupManager == "systemd" {
|
if r.cgroupManager == "systemd" {
|
||||||
|
@ -163,9 +165,10 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||||
if c.terminal {
|
if c.terminal {
|
||||||
cmd.Stderr = &stderrBuf
|
cmd.Stderr = &stderrBuf
|
||||||
}
|
}
|
||||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
|
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
|
||||||
// 0, 1 and 2 are stdin, stdout and stderr
|
// 0, 1 and 2 are stdin, stdout and stderr
|
||||||
cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
|
cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
|
||||||
|
cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
|
||||||
|
|
||||||
err = cmd.Start()
|
err = cmd.Start()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -175,6 +178,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||||
|
|
||||||
// We don't need childPipe on the parent side
|
// We don't need childPipe on the parent side
|
||||||
childPipe.Close()
|
childPipe.Close()
|
||||||
|
childStartPipe.Close()
|
||||||
|
|
||||||
// Move conmon to specified cgroup
|
// Move conmon to specified cgroup
|
||||||
if cgroupParent != "" {
|
if cgroupParent != "" {
|
||||||
|
@ -186,6 +190,19 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We set the cgroup, now the child can start creating children */
|
||||||
|
someData := []byte{0}
|
||||||
|
_, err = parentStartPipe.Write(someData)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for initial setup and fork, and reap child */
|
||||||
|
err = cmd.Wait()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Wait to get container pid from conmon
|
// Wait to get container pid from conmon
|
||||||
type syncStruct struct {
|
type syncStruct struct {
|
||||||
si *syncInfo
|
si *syncInfo
|
||||||
|
@ -207,21 +224,10 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||||
return fmt.Errorf("error reading container (probably exited) json message: %v", ss.err)
|
return fmt.Errorf("error reading container (probably exited) json message: %v", ss.err)
|
||||||
}
|
}
|
||||||
logrus.Debugf("Received container pid: %d", ss.si.Pid)
|
logrus.Debugf("Received container pid: %d", ss.si.Pid)
|
||||||
errorMessage := ""
|
|
||||||
if c.terminal {
|
|
||||||
errorMessage = stderrBuf.String()
|
|
||||||
fmt.Fprintf(os.Stderr, errorMessage)
|
|
||||||
errorMessage = sanitizeConmonErrorMessage(errorMessage)
|
|
||||||
} else {
|
|
||||||
if ss.si.Message != "" {
|
|
||||||
errorMessage = ss.si.Message
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ss.si.Pid == -1 {
|
if ss.si.Pid == -1 {
|
||||||
if errorMessage != "" {
|
if ss.si.Message != "" {
|
||||||
logrus.Debugf("Container creation error: %s", errorMessage)
|
logrus.Debugf("Container creation error: %s", ss.si.Message)
|
||||||
return fmt.Errorf("container create failed: %s", errorMessage)
|
return fmt.Errorf("container create failed: %s", ss.si.Message)
|
||||||
}
|
}
|
||||||
logrus.Debugf("Container creation failed")
|
logrus.Debugf("Container creation failed")
|
||||||
return fmt.Errorf("container create failed")
|
return fmt.Errorf("container create failed")
|
||||||
|
@ -232,18 +238,6 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanitizeConmonErrorMessage removes conmon debug messages from error string
|
|
||||||
func sanitizeConmonErrorMessage(errString string) string {
|
|
||||||
var sanitizedLines []string
|
|
||||||
lines := strings.Split(errString, "\n")
|
|
||||||
for _, line := range lines {
|
|
||||||
if !strings.HasPrefix(line, "[conmon") {
|
|
||||||
sanitizedLines = append(sanitizedLines, line)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return strings.Join(sanitizedLines, "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
func createUnitName(prefix string, name string) string {
|
func createUnitName(prefix string, name string) string {
|
||||||
return fmt.Sprintf("%s-%s.scope", prefix, name)
|
return fmt.Sprintf("%s-%s.scope", prefix, name)
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,8 +69,16 @@ func RunUnderSystemdScope(pid int, slice string, unitName string) error {
|
||||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||||
properties = append(properties, newProp("Delegate", true))
|
properties = append(properties, newProp("Delegate", true))
|
||||||
properties = append(properties, newProp("DefaultDependencies", false))
|
properties = append(properties, newProp("DefaultDependencies", false))
|
||||||
_, err = conn.StartTransientUnit(unitName, "replace", properties, nil)
|
ch := make(chan string)
|
||||||
|
_, err = conn.StartTransientUnit(unitName, "replace", properties, ch)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Block until job is started
|
||||||
|
<-ch
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||||
|
|
Loading…
Reference in a new issue