diff --git a/.gitignore b/.gitignore index be7505ca..981861ff 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ /ocic /ocid /ocid.conf +*.o *.orig /pause/pause /pause/pause.o diff --git a/conmon/cmsg.c b/conmon/cmsg.c new file mode 100644 index 00000000..c44db2ef --- /dev/null +++ b/conmon/cmsg.c @@ -0,0 +1,149 @@ +/* + * Copyright 2016 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* NOTE: This code comes directly from runc/libcontainer/utils/cmsg.c. */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cmsg.h" + +#define error(fmt, ...) \ + ({ \ + fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \ + errno = ECOMM; \ + goto err; /* return value */ \ + }) + +/* + * Sends a file descriptor along the sockfd provided. Returns the return + * value of sendmsg(2). Any synchronisation and preparation of state + * should be done external to this (we expect the other side to be in + * recvfd() in the code). + */ +ssize_t sendfd(int sockfd, struct file_t file) +{ + struct msghdr msg = {0}; + struct iovec iov[1] = {0}; + struct cmsghdr *cmsg; + int *fdptr; + + union { + char buf[CMSG_SPACE(sizeof(file.fd))]; + struct cmsghdr align; + } u; + + /* + * We need to send some other data along with the ancillary data, + * otherwise the other side won't recieve any data. This is very + * well-hidden in the documentation (and only applies to + * SOCK_STREAM). See the bottom part of unix(7). + */ + iov[0].iov_base = file.name; + iov[0].iov_len = strlen(file.name) + 1; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + fdptr = (int *) CMSG_DATA(cmsg); + memcpy(fdptr, &file.fd, sizeof(int)); + + return sendmsg(sockfd, &msg, 0); +} + +/* + * Receives a file descriptor from the sockfd provided. Returns the file + * descriptor as sent from sendfd(). It will return the file descriptor + * or die (literally) trying. Any synchronisation and preparation of + * state should be done external to this (we expect the other side to be + * in sendfd() in the code). + */ +struct file_t recvfd(int sockfd) +{ + struct msghdr msg = {0}; + struct iovec iov[1] = {0}; + struct cmsghdr *cmsg; + struct file_t file = {0}; + int *fdptr; + int olderrno; + + union { + char buf[CMSG_SPACE(sizeof(file.fd))]; + struct cmsghdr align; + } u; + + /* Allocate a buffer. */ + /* TODO: Make this dynamic with MSG_PEEK. */ + file.name = malloc(TAG_BUFFER); + if (!file.name) + error("recvfd: failed to allocate file.tag buffer\n"); + + /* + * We need to "recieve" the non-ancillary data even though we don't + * plan to use it at all. Otherwise, things won't work as expected. + * See unix(7) and other well-hidden documentation. + */ + iov[0].iov_base = file.name; + iov[0].iov_len = TAG_BUFFER; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + ssize_t ret = recvmsg(sockfd, &msg, 0); + if (ret < 0) + goto err; + + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg) + error("recvfd: got NULL from CMSG_FIRSTHDR"); + if (cmsg->cmsg_level != SOL_SOCKET) + error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level); + if (cmsg->cmsg_type != SCM_RIGHTS) + error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type); + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len); + + fdptr = (int *) CMSG_DATA(cmsg); + if (!fdptr || *fdptr < 0) + error("recvfd: recieved invalid pointer"); + + file.fd = *fdptr; + return file; + +err: + olderrno = errno; + free(file.name); + errno = olderrno; + return (struct file_t){0}; +} diff --git a/conmon/cmsg.h b/conmon/cmsg.h new file mode 100644 index 00000000..7c7aefe6 --- /dev/null +++ b/conmon/cmsg.h @@ -0,0 +1,38 @@ +/* + * Copyright 2016 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* NOTE: This code comes directly from runc/libcontainer/utils/cmsg.h. */ + +#pragma once + +#if !defined(CMSG_H) +#define CMSG_H + +#include + +/* TODO: Implement this properly with MSG_PEEK. */ +#define TAG_BUFFER 4096 + +/* This mirrors Go's (*os.File). */ +struct file_t { + char *name; + int fd; +}; + +struct file_t recvfd(int sockfd); +ssize_t sendfd(int sockfd, struct file_t file); + +#endif /* !defined(CMSG_H) */ diff --git a/conmon/conmon.c b/conmon/conmon.c index 279d7a15..87ca14ab 100644 --- a/conmon/conmon.c +++ b/conmon/conmon.c @@ -2,36 +2,46 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include #include #include -#include #include #include +#include "cmsg.h" + #define pexit(fmt, ...) \ do { \ - fprintf(stderr, "conmon: " fmt " %m\n", ##__VA_ARGS__); \ + fprintf(stderr, "[conmon:e]: " fmt " %m\n", ##__VA_ARGS__); \ syslog(LOG_ERR, "conmon : " fmt ": %m\n", ##__VA_ARGS__); \ exit(EXIT_FAILURE); \ } while (0) #define nexit(fmt, ...) \ do { \ - fprintf(stderr, "conmon: " fmt "\n", ##__VA_ARGS__); \ + fprintf(stderr, "[conmon:e]: " fmt "\n", ##__VA_ARGS__); \ syslog(LOG_ERR, "conmon : " fmt " \n", ##__VA_ARGS__); \ exit(EXIT_FAILURE); \ } while (0) #define nwarn(fmt, ...) \ do { \ - fprintf(stderr, "conmon: " fmt "\n", ##__VA_ARGS__); \ + fprintf(stderr, "[conmon:w]: " fmt "\n", ##__VA_ARGS__); \ + } while (0) + +#define ninfo(fmt, ...) \ + do { \ + fprintf(stderr, "[conmon:i]: " fmt "\n", ##__VA_ARGS__); \ } while (0) #define _cleanup_(x) __attribute__((cleanup(x))) @@ -58,14 +68,6 @@ static inline void gstring_free_cleanup(GString **string) #define _cleanup_close_ _cleanup_(closep) #define _cleanup_gstring_ _cleanup_(gstring_free_cleanup) -struct termios tty_orig; - -static void tty_restore(void) -{ - if (tcsetattr(STDIN_FILENO, TCSANOW, &tty_orig) == -1) - pexit("tcsetattr"); -} - #define BUF_SIZE 256 #define CMD_SIZE 1024 #define MAX_EVENTS 10 @@ -77,6 +79,7 @@ static char *bundle_path = NULL; static char *pid_file = NULL; static bool systemd_cgroup = false; static bool exec = false; +static char *log_path = NULL; static GOptionEntry entries[] = { { "terminal", 't', 0, G_OPTION_ARG_NONE, &terminal, "Terminal", NULL }, @@ -86,6 +89,7 @@ static GOptionEntry entries[] = { "pidfile", 'p', 0, G_OPTION_ARG_STRING, &pid_file, "PID file", NULL }, { "systemd-cgroup", 's', 0, G_OPTION_ARG_NONE, &systemd_cgroup, "Enable systemd cgroup manager", NULL }, { "exec", 'e', 0, G_OPTION_ARG_NONE, &exec, "Exec a command in a running container", NULL }, + { "log-path", 'l', 0, G_OPTION_ARG_STRING, &log_path, "Log file path", NULL }, { NULL } }; @@ -98,13 +102,15 @@ int main(int argc, char *argv[]) _cleanup_free_ char *contents; int cpid = -1; int status; - pid_t pid; + pid_t pid, create_pid; + _cleanup_close_ int logfd = -1; _cleanup_close_ int mfd = -1; _cleanup_close_ int epfd = -1; - char slname[BUF_SIZE]; + _cleanup_close_ int csfd = -1; + int runtime_mfd = -1; + char csname[PATH_MAX] = "/tmp/conmon-term.XXXXXXXX"; char buf[BUF_SIZE]; int num_read; - struct termios t; struct epoll_event ev; struct epoll_event evlist[MAX_EVENTS]; int sync_pipe_fd = -1; @@ -115,11 +121,11 @@ int main(int argc, char *argv[]) _cleanup_gstring_ GString *cmd = NULL; /* Command line parameters */ - context = g_option_context_new ("- conmon utility"); - g_option_context_add_main_entries (context, entries, "conmon"); - if (!g_option_context_parse (context, &argc, &argv, &error)) { - g_print ("option parsing failed: %s\n", error->message); - exit (1); + context = g_option_context_new("- conmon utility"); + g_option_context_add_main_entries(context, entries, "conmon"); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_print("option parsing failed: %s\n", error->message); + exit(1); } if (cid == NULL) @@ -132,7 +138,6 @@ int main(int argc, char *argv[]) if (getcwd(cwd, sizeof(cwd)) == NULL) { nexit("Failed to get working directory"); } - bundle_path = cwd; } @@ -141,10 +146,12 @@ int main(int argc, char *argv[]) "%s/pidfile-%s", cwd, cid) < 0) { nexit("Failed to generate the pidfile path"); } - pid_file = default_pid_file; } + if (log_path == NULL) + nexit("Log file path not provided. Use --log-path"); + /* Environment variables */ sync_pipe = getenv("_OCI_SYNCPIPE"); if (sync_pipe) { @@ -154,6 +161,11 @@ int main(int argc, char *argv[]) pexit("unable to parse _OCI_SYNCPIPE"); } + /* Open the log path file. */ + logfd = open(log_path, O_WRONLY | O_APPEND | O_CREAT); + if (logfd < 0) + pexit("Failed to open log file"); + /* * Set self as subreaper so we can wait for container process * and return its exit code. @@ -164,163 +176,247 @@ int main(int argc, char *argv[]) } if (terminal) { - /* Open the master pty */ - mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY); - if (mfd < 0) - pexit("Failed to open console master pty"); + struct sockaddr_un addr = {0}; - /* Grant access to the slave pty */ - if (grantpt(mfd) == -1) - pexit("Failed to grant access to slave pty"); + /* + * Generate a temporary name. Is this unsafe? Probably, but we can + * replace it with a rename(2) setup if necessary. + */ - /* Unlock the slave pty */ - if (unlockpt(mfd) == -1) { /* Unlock slave pty */ - pexit("Failed to unlock the slave pty"); - } + int unusedfd = g_mkstemp(csname); + if (unusedfd < 0) + pexit("Failed to generate random path for console-socket"); + close(unusedfd); - /* Get the slave pty name */ - ret = ptsname_r(mfd, slname, BUF_SIZE); - if (ret != 0) { - pexit("Failed to get the slave pty name"); - } + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, csname, sizeof(addr.sun_path)-1); + ninfo("addr{sun_family=AF_UNIX, sun_path=%s}", addr.sun_path); + + /* Bind to the console socket path. */ + csfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (csfd < 0) + pexit("Failed to create console-socket"); + /* XXX: This should be handled with a rename(2). */ + if (unlink(csname) < 0) + pexit("Failed to unlink temporary ranom path"); + if (bind(csfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + pexit("Failed to bind to console-socket"); + if (listen(csfd, 128) < 0) + pexit("Failed to listen on console-socket"); + } else { + int fds[2]; + + /* + * Create a "fake" master fd so that we can use the same epoll code in + * both cases. The runtime_mfd will be closed after we dup over + * everything. + * + * TODO: Maybe this should be done with pipe(2)s? + */ + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, fds) < 0) + pexit("Failed to create runtime_mfd socketpair"); + + mfd = fds[0]; + runtime_mfd = fds[1]; } cmd = g_string_new(runtime_path); - if (!exec) { - /* Create the container */ - if (systemd_cgroup) { - g_string_append_printf(cmd, " --systemd-cgroup"); - } - g_string_append_printf(cmd, " create %s --bundle %s --pid-file %s", - cid, bundle_path, pid_file); - if (terminal) { - g_string_append_printf(cmd, " --console %s", slname); - } - } else { + + /* Generate the cmdline. */ + if (exec && systemd_cgroup) + g_string_append_printf(cmd, " --systemd-cgroup"); + + if (exec) + g_string_append_printf(cmd, " exec -d --pid-file %s", pid_file); + else + g_string_append_printf(cmd, " create --bundle %s --pid-file %s", bundle_path, pid_file); + + if (terminal) + g_string_append_printf(cmd, " --console-socket %s", csname); + + /* Container name comes last. */ + g_string_append_printf(cmd, " %s", cid); + + /* Set the exec arguments. */ + if (exec) { + /* + * FIXME: This code is broken if argv[1] contains spaces or other + * similar characters that shells don't like. It's a bit silly + * that we're doing things inside a shell at all -- this should + * all be done in arrays. + */ + int i; - - /* Exec the command */ - if (terminal) { - g_string_append_printf(cmd, " exec -d --pid-file %s --console %s %s", - pid_file, slname, cid); - } else { - g_string_append_printf(cmd, " exec -d --pid-file %s %s", - pid_file, cid); - } - - for (i = 1; i < argc; i++) { + for (i = 1; i < argc; i++) g_string_append_printf(cmd, " %s", argv[i]); - } } - runtime_status = system(cmd->str); - if (runtime_status != 0) { - nexit("Failed to create container"); + /* + * We have to fork here because the current runC API dups the stdio of the + * calling process over the container's fds. This is actually *very bad* + * but is currently being discussed for change in + * https://github.com/opencontainers/runtime-spec/pull/513. Hopefully this + * won't be the case for very long. + */ + + /* Create our container. */ + create_pid = fork(); + if (create_pid < 0) { + pexit("Failed to fork the create command"); + } else if (!create_pid) { + char *argv[] = {"sh", "-c", cmd->str, NULL}; + + /* We only need to touch the stdio if we have terminal=false. */ + /* FIXME: This results in us not outputting runc error messages to ocid's log. */ + if (runtime_mfd >= 0) { + if (dup2(runtime_mfd, STDIN_FILENO) < 0) + pexit("Failed to dup over stdin"); + if (dup2(runtime_mfd, STDOUT_FILENO) < 0) + pexit("Failed to dup over stdout"); + if (dup2(runtime_mfd, STDERR_FILENO) < 0) + pexit("Failed to dup over stderr"); + } + + /* Exec into the process. TODO: Don't use the shell. */ + execv("/bin/sh", argv); + exit(127); } + /* The runtime has that fd now. We don't need to touch it anymore. */ + close(runtime_mfd); + + /* Get the console fd. */ + /* + * FIXME: If runc fails to start a container, we won't bail because we're + * busy waiting for requests. The solution probably involves + * epoll(2) and a signalfd(2). This causes a lot of issues. + */ + if (terminal) { + struct file_t console; + int connfd = -1; + + ninfo("about to accept from csfd: %d", csfd); + connfd = accept4(csfd, NULL, NULL, SOCK_CLOEXEC); + if (connfd < 0) + pexit("Failed to accept console-socket connection"); + + /* Not accepting anything else. */ + close(csfd); + unlink(csname); + + /* We exit if this fails. */ + ninfo("about to recvfd from connfd: %d", connfd); + console = recvfd(connfd); + + ninfo("console = {.name = '%s'; .fd = %d}", console.name, console.fd); + free(console.name); + + mfd = console.fd; + + /* Clean up everything */ + close(connfd); + } + + ninfo("about to waitpid: %d", create_pid); + + /* Wait for our create child to exit with the return code. */ + if (waitpid(create_pid, &runtime_status, 0) < 0) { + int old_errno = errno; + kill(create_pid, SIGKILL); + errno = old_errno; + pexit("Failed to wait for `runtime %s`", exec ? "exec" : "create"); + } + if (!WIFEXITED(runtime_status) || WEXITSTATUS(runtime_status) != 0) + nexit("Failed to create container: exit status %d", WEXITSTATUS(runtime_status)); + /* Read the pid so we can wait for the process to exit */ g_file_get_contents(pid_file, &contents, NULL, &err); if (err) { - fprintf(stderr, "Failed to read pidfile: %s\n", err->message); + nwarn("Failed to read pidfile: %s", err->message); g_error_free(err); exit(1); } cpid = atoi(contents); - printf("container PID: %d\n", cpid); + ninfo("container PID: %d", cpid); /* Send the container pid back to parent */ - if (sync_pipe_fd > 0 && !exec) { + if (sync_pipe_fd > 0 && !exec) { len = snprintf(buf, BUF_SIZE, "{\"pid\": %d}\n", cpid); if (len < 0 || write(sync_pipe_fd, buf, len) != len) { pexit("unable to send container pid to parent"); } } - if (terminal) { - /* Save exiting termios settings */ - if (tcgetattr(STDIN_FILENO, &tty_orig) == -1) - pexit("tcegetattr"); - - /* Settings for raw mode */ - t.c_lflag &= - ~(ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL | IEXTEN); - t.c_iflag &= - ~(BRKINT | ICRNL | IGNBRK | IGNCR | INLCR | INPCK | ISTRIP | - IXON | IXOFF | IGNPAR | PARMRK); - t.c_oflag &= ~OPOST; - t.c_cc[VMIN] = 1; - t.c_cc[VTIME] = 0; - - /* Set terminal to raw mode */ - if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &t) == -1) - pexit("tcsetattr"); - - /* Setup terminal restore on exit */ - if (atexit(tty_restore) != 0) - pexit("atexit"); - - epfd = epoll_create(5); - if (epfd < 0) - pexit("epoll_create"); - ev.events = EPOLLIN; - ev.data.fd = STDIN_FILENO; - if (epoll_ctl(epfd, EPOLL_CTL_ADD, STDIN_FILENO, &ev) < 0) { - pexit("Failed to add stdin to epoll"); - } - ev.data.fd = mfd; - if (epoll_ctl(epfd, EPOLL_CTL_ADD, mfd, &ev) < 0) { - pexit("Failed to add console master fd to epoll"); - } - - /* Copy data back and forth between STDIN and master fd */ - while (true) { - int ready = epoll_wait(epfd, evlist, MAX_EVENTS, -1); - int i = 0; - for (i = 0; i < ready; i++) { - if (evlist[i].events & EPOLLIN) { - if (evlist[i].data.fd == STDIN_FILENO) { - num_read = - read(STDIN_FILENO, buf, - BUF_SIZE); - if (num_read <= 0) - goto out; - - if (write(mfd, buf, num_read) != - num_read) { - nwarn - ("partial/failed write (masterFd)"); - goto out; - } - } else if (evlist[i].data.fd == mfd) { - num_read = - read(mfd, buf, BUF_SIZE); - if (num_read <= 0) - goto out; - - if (write - (STDOUT_FILENO, buf, - num_read) != num_read) { - nwarn - ("partial/failed write (STDOUT_FILENO)"); - goto out; - } - } - } else if (evlist[i].events & - (EPOLLHUP | EPOLLERR)) { - printf("closing fd %d\n", - evlist[i].data.fd); - if (close(evlist[i].data.fd) < 0) - pexit("close"); - goto out; - } - } - } - out: - tty_restore(); + /* Create epoll_ctl so that we can handle read/write events. */ + /* + * TODO: Switch to libuv so that we can also implement exec as well as + * attach and other important things. Using epoll directly is just + * really nasty. + */ + epfd = epoll_create(5); + if (epfd < 0) + pexit("epoll_create"); + ev.events = EPOLLIN; + /* + ev.data.fd = STDIN_FILENO; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, STDIN_FILENO, &ev) < 0) { + pexit("Failed to add stdin to epoll"); + } + */ + ev.data.fd = mfd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, mfd, &ev) < 0) { + pexit("Failed to add console master fd to epoll"); } + /* + * Log all of the container's output and pipe STDIN into it. Currently + * nothing using the STDIN setup (which makes its inclusion here a bit + * questionable but we need to rewrite this code soon anyway TODO). + */ + while (true) { + int ready = epoll_wait(epfd, evlist, MAX_EVENTS, -1); + int i = 0; + for (i = 0; i < ready; i++) { + if (evlist[i].events & EPOLLIN) { + if (evlist[i].data.fd == STDIN_FILENO) { + /* + * TODO: We need to replace STDIN_FILENO with something + * more sophisticated so that attach actually works + * properly. + */ + num_read = read(STDIN_FILENO, buf, BUF_SIZE); + if (num_read <= 0) + goto out; + + if (write(mfd, buf, num_read) != num_read) { + nwarn("partial/failed write (masterFd)"); + goto out; + } + } else if (evlist[i].data.fd == mfd) { + num_read = read(mfd, buf, BUF_SIZE); + if (num_read <= 0) + goto out; + + ninfo("read a chunk: (fd=%d) '%s'", mfd, buf); + + /* Log all output to logfd. */ + if (write(logfd, buf, num_read) != num_read) { + nwarn("partial/failed write (logFd)"); + goto out; + } + } + } else if (evlist[i].events & (EPOLLHUP | EPOLLERR)) { + printf("closing fd %d\n", evlist[i].data.fd); + if (close(evlist[i].data.fd) < 0) + pexit("close"); + goto out; + } + } + } + +out: /* Wait for the container process and record its exit code */ while ((pid = waitpid(-1, &status, 0)) > 0) { int exit_status = WEXITSTATUS(status); diff --git a/oci/oci.go b/oci/oci.go index ff19d59e..a00a819e 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -120,9 +120,13 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { args = append(args, "-r", r.Path(c)) args = append(args, "-b", c.bundlePath) args = append(args, "-p", filepath.Join(c.bundlePath, "pidfile")) + args = append(args, "-l", c.logPath) if c.terminal { args = append(args, "-t") } + logrus.WithFields(logrus.Fields{ + "args": args, + }).Debugf("running conmon: %s", r.conmonPath) cmd := exec.Command(r.conmonPath, args...) cmd.Dir = c.bundlePath @@ -248,6 +252,19 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp } }() + logFile, err := ioutil.TempFile("", "ocid-log-"+c.name) + if err != nil { + return nil, ExecSyncError{ + ExitCode: -1, + Err: err, + } + } + logPath := logFile.Name() + defer func() { + logFile.Close() + os.RemoveAll(logPath) + }() + var args []string args = append(args, "-c", c.name) args = append(args, "-r", r.Path(c)) @@ -256,6 +273,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp if c.terminal { args = append(args, "-t") } + args = append(args, "-l", logPath) args = append(args, command...) @@ -371,9 +389,25 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp } } + // The actual logged output is not the same as stdoutBuf and stderrBuf, + // which are used for getting error information. For the actual + // ExecSyncResponse we have to read the logfile. + // XXX: Currently runC dups the same console over both stdout and stderr, + // so we can't differentiate between the two. + + outputBytes, err := ioutil.ReadFile(logPath) + if err != nil { + return nil, ExecSyncError{ + Stdout: stdoutBuf, + Stderr: stderrBuf, + ExitCode: -1, + Err: err, + } + } + return &ExecSyncResponse{ - Stdout: stdoutBuf.Bytes(), - Stderr: stderrBuf.Bytes(), + Stdout: outputBytes, + Stderr: nil, ExitCode: 0, }, nil } diff --git a/server/container_create.go b/server/container_create.go index 78d4fc92..cad91100 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -332,6 +332,22 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } logPath := containerConfig.LogPath + if logPath == "" { + // TODO: Should we use sandboxConfig.GetLogDirectory() here? + logPath = filepath.Join(sb.logDir, containerID+".log") + } + if !filepath.IsAbs(logPath) { + // XXX: It's not really clear what this should be versus the sbox logDirectory. + logrus.Warnf("requested logPath for ctr id %s is a relative path: %s", containerID, logPath) + logPath = filepath.Join(sb.logDir, logPath) + } + + logrus.WithFields(logrus.Fields{ + "sbox.logdir": sb.logDir, + "ctr.logfile": containerConfig.LogPath, + "log_path": logPath, + }).Debugf("setting container's log_path") + specgen.SetProcessTerminal(containerConfig.Tty) linux := containerConfig.GetLinux() diff --git a/server/sandbox.go b/server/sandbox.go index 747c4dad..1832265c 100644 --- a/server/sandbox.go +++ b/server/sandbox.go @@ -121,7 +121,6 @@ func hostNetNsPath() (string, error) { } defer netNS.Close() - return netNS.Path(), nil } diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 410365f5..c9169fe4 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -149,12 +149,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.SetHostname(hostname) } - // set log directory - logDir := req.GetConfig().LogDirectory - if logDir == "" { - logDir = filepath.Join(s.config.LogDir, id) - } - // set DNS options if req.GetConfig().GetDnsConfig() != nil { dnsServers := req.GetConfig().GetDnsConfig().Servers @@ -194,6 +188,19 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, err } + // set log directory + logDir := req.GetConfig().LogDirectory + if logDir == "" { + logDir = filepath.Join(s.config.LogDir, id) + } + if err = os.MkdirAll(logDir, 0700); err != nil { + return nil, err + } + // This should always be absolute from k8s. + if !filepath.IsAbs(logDir) { + return nil, fmt.Errorf("requested logDir for sbox id %s is a relative path: %s", id, logDir) + } + // Don't use SELinux separation with Host Pid or IPC Namespace, if !req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().HostPid && !req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().HostIpc { processLabel, mountLabel, err = getSELinuxLabels(nil) @@ -245,12 +252,14 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } }() - privileged := s.privilegedSandbox(req) + // set log path inside log directory + logPath := filepath.Join(logDir, id+".log") + privileged := s.privilegedSandbox(req) g.AddAnnotation("ocid/metadata", string(metadataJSON)) g.AddAnnotation("ocid/labels", string(labelsJSON)) g.AddAnnotation("ocid/annotations", string(annotationsJSON)) - g.AddAnnotation("ocid/log_path", logDir) + g.AddAnnotation("ocid/log_path", logPath) g.AddAnnotation("ocid/name", name) g.AddAnnotation("ocid/container_type", containerTypeSandbox) g.AddAnnotation("ocid/sandbox_id", id) @@ -379,7 +388,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err) } - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logDir, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged) if err != nil { return nil, err } diff --git a/server/server.go b/server/server.go index 6e67e476..dbf9a362 100644 --- a/server/server.go +++ b/server/server.go @@ -5,6 +5,7 @@ import ( "fmt" "io/ioutil" "os" + "path/filepath" "sync" "github.com/Sirupsen/logrus" @@ -175,7 +176,7 @@ func (s *Server) loadSandbox(id string) error { sb := &sandbox{ id: id, name: name, - logDir: m.Annotations["ocid/log_path"], + logDir: filepath.Dir(m.Annotations["ocid/log_path"]), labels: labels, containers: oci.NewMemoryStore(), processLabel: processLabel, @@ -225,7 +226,7 @@ func (s *Server) loadSandbox(id string) error { } }() - scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false, privileged) + scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, nil, nil, id, false, privileged) if err != nil { return err }