conmon: use waitpid to wait for terminated processes

During my testing in OpenShift I've noticed that conmon leaves some
zombies processes.  The reason is that we are using
PR_SET_CHILD_SUBREAPER in conmon and runC forks a new process (runc
init) each time we start a container.  Using g_child_watch_add only on
the main runc process and on the container process is not enough as we
do not cleanup any other zombie process.

Since glib doesn't allow to catch SIGCHLD and to better integrate in the
existing code, catch it with signal(2) then raise a SIGUSR1 that glib
handles.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2017-07-22 17:17:14 +02:00
parent 55310f9a95
commit 595b0557f3

View file

@ -550,6 +550,45 @@ static bool read_stdio(int fd, stdpipe_t pipe, bool *eof)
}
}
static void on_sigchld(G_GNUC_UNUSED int signal)
{
raise (SIGUSR1);
}
static void check_child_processes(GHashTable *pid_to_handler)
{
void (*cb) (GPid, int, gpointer);
for (;;) {
int status;
pid_t pid = waitpid(-1, &status, WNOHANG);
if (pid < 0 && errno == EINTR)
continue;
if (pid < 0 && errno == ECHILD) {
g_main_loop_quit (main_loop);
return;
}
if (pid < 0)
pexit("Failed to read child process status");
if (pid == 0)
return;
/* If we got here, pid > 0, so we have a valid pid to check. */
cb = g_hash_table_lookup(pid_to_handler, &pid);
if (cb)
cb(pid, status, 0);
}
}
static gboolean on_sigusr1_cb(gpointer user_data)
{
GHashTable *pid_to_handler = (GHashTable *) user_data;
check_child_processes (pid_to_handler);
return G_SOURCE_CONTINUE;
}
static gboolean stdio_cb(int fd, GIOCondition condition, gpointer user_data)
{
stdpipe_t pipe = GPOINTER_TO_INT(user_data);
@ -1221,10 +1260,24 @@ int main(int argc, char *argv[])
close(slavefd_stdout);
close(slavefd_stderr);
/* Map pid to its handler. */
GHashTable *pid_to_handler = g_hash_table_new (g_int_hash, g_int_equal);
g_hash_table_insert (pid_to_handler, &create_pid, runtime_exit_cb);
/*
* Glib does not support SIGCHLD so use SIGUSR1 with the same semantic. We will
* catch SIGCHLD and raise(SIGUSR1) in the signal handler.
*/
g_unix_signal_add (SIGUSR1, on_sigusr1_cb, pid_to_handler);
if (signal(SIGCHLD, on_sigchld) == SIG_ERR)
pexit("Failed to set handler for SIGCHLD");
ninfo("about to waitpid: %d", create_pid);
if (csname != NULL) {
guint terminal_watch = g_unix_fd_add (console_socket_fd, G_IO_IN, terminal_accept_cb, csname);
g_child_watch_add (create_pid, runtime_exit_cb, NULL);
/* Process any SIGCHLD we may have missed before the signal handler was in place. */
check_child_processes (pid_to_handler);
g_main_loop_run (main_loop);
g_source_remove (terminal_watch);
} else {
@ -1239,6 +1292,7 @@ int main(int argc, char *argv[])
errno = old_errno;
pexit("Failed to wait for `runtime %s`", opt_exec ? "exec" : "create");
}
}
if (!WIFEXITED(runtime_status) || WEXITSTATUS(runtime_status) != 0) {
@ -1270,6 +1324,8 @@ int main(int argc, char *argv[])
container_pid = atoi(contents);
ninfo("container PID: %d", container_pid);
g_hash_table_insert (pid_to_handler, &container_pid, container_exit_cb);
/* Setup endpoint for attach */
_cleanup_free_ char *attach_symlink_dir_path = NULL;
if (!opt_exec) {
@ -1298,7 +1354,7 @@ int main(int argc, char *argv[])
g_timeout_add_seconds (opt_timeout, timeout_cb, NULL);
}
g_child_watch_add (container_pid, container_exit_cb, NULL);
check_child_processes(pid_to_handler);
g_main_loop_run (main_loop);