merge branch 'pr-162'

Closes #162
LGTMs: @sameo @runcom @mrunalp @cyphar
This commit is contained in:
Aleksa Sarai 2017-04-05 04:50:27 +10:00
commit 7f660a2060
No known key found for this signature in database
GPG key ID: 9E18AA267DDB8DB4
24 changed files with 795 additions and 179 deletions

1
.gitignore vendored
View file

@ -7,6 +7,7 @@
/ocic
/ocid
/ocid.conf
*.o
*.orig
/pause/pause
/pause/pause.o

View file

@ -6,7 +6,8 @@ set -o pipefail
for d in $(find . -type d -not -iwholename '*.git*' -a -not -iname '.tool' -a -not -iwholename '*vendor*'); do
${GOPATH}/bin/gometalinter \
--exclude='error return value not checked.*(Close|Log|Print).*\(errcheck\)$' \
--exclude='error return value not checked.*(Close|Log|Print|RemoveAll).*\(errcheck\)$' \
--exclude='declaration of.*err.*shadows declaration.*\(vetshadow\)$' \
--exclude='.*_test\.go:.*error return value not checked.*\(errcheck\)$' \
--exclude='duplicate of.*_test.go.*\(dupl\)$' \
--exclude='cmd\/client\/.*\.go.*\(dupl\)$' \
@ -15,7 +16,7 @@ for d in $(find . -type d -not -iwholename '*.git*' -a -not -iname '.tool' -a -n
--disable=aligncheck \
--disable=gotype \
--disable=gas \
--cyclo-over=60 \
--cyclo-over=80 \
--dupl-threshold=100 \
--tests \
--deadline=60s "${d}"

View file

@ -43,7 +43,9 @@ RUN mkdir -p /usr/src/criu \
&& rm -rf /usr/src/criu
# Install runc
ENV RUNC_COMMIT cc29e3dded8e27ba8f65738f40d251c885030a28
# TODO: This should actually be v1.0.0-rc3 but we first need to switch to
# v1.0.0-rc5 runtime config generation.
ENV RUNC_COMMIT 31980a53ae7887b2c8f8715d13c3eb486c27b6cf
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \

149
conmon/cmsg.c Normal file
View file

@ -0,0 +1,149 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* NOTE: This code comes directly from runc/libcontainer/utils/cmsg.c. */
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include "cmsg.h"
#define error(fmt, ...) \
({ \
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
errno = ECOMM; \
goto err; /* return value */ \
})
/*
* Sends a file descriptor along the sockfd provided. Returns the return
* value of sendmsg(2). Any synchronisation and preparation of state
* should be done external to this (we expect the other side to be in
* recvfd() in the code).
*/
ssize_t sendfd(int sockfd, struct file_t file)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
int *fdptr;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/*
* We need to send some other data along with the ancillary data,
* otherwise the other side won't recieve any data. This is very
* well-hidden in the documentation (and only applies to
* SOCK_STREAM). See the bottom part of unix(7).
*/
iov[0].iov_base = file.name;
iov[0].iov_len = strlen(file.name) + 1;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
fdptr = (int *) CMSG_DATA(cmsg);
memcpy(fdptr, &file.fd, sizeof(int));
return sendmsg(sockfd, &msg, 0);
}
/*
* Receives a file descriptor from the sockfd provided. Returns the file
* descriptor as sent from sendfd(). It will return the file descriptor
* or die (literally) trying. Any synchronisation and preparation of
* state should be done external to this (we expect the other side to be
* in sendfd() in the code).
*/
struct file_t recvfd(int sockfd)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
struct file_t file = {0};
int *fdptr;
int olderrno;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/* Allocate a buffer. */
/* TODO: Make this dynamic with MSG_PEEK. */
file.name = malloc(TAG_BUFFER);
if (!file.name)
error("recvfd: failed to allocate file.tag buffer\n");
/*
* We need to "recieve" the non-ancillary data even though we don't
* plan to use it at all. Otherwise, things won't work as expected.
* See unix(7) and other well-hidden documentation.
*/
iov[0].iov_base = file.name;
iov[0].iov_len = TAG_BUFFER;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
ssize_t ret = recvmsg(sockfd, &msg, 0);
if (ret < 0)
goto err;
cmsg = CMSG_FIRSTHDR(&msg);
if (!cmsg)
error("recvfd: got NULL from CMSG_FIRSTHDR");
if (cmsg->cmsg_level != SOL_SOCKET)
error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
if (cmsg->cmsg_type != SCM_RIGHTS)
error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len);
fdptr = (int *) CMSG_DATA(cmsg);
if (!fdptr || *fdptr < 0)
error("recvfd: recieved invalid pointer");
file.fd = *fdptr;
return file;
err:
olderrno = errno;
free(file.name);
errno = olderrno;
return (struct file_t){0};
}

38
conmon/cmsg.h Normal file
View file

@ -0,0 +1,38 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* NOTE: This code comes directly from runc/libcontainer/utils/cmsg.h. */
#pragma once
#if !defined(CMSG_H)
#define CMSG_H
#include <sys/types.h>
/* TODO: Implement this properly with MSG_PEEK. */
#define TAG_BUFFER 4096
/* This mirrors Go's (*os.File). */
struct file_t {
char *name;
int fd;
};
struct file_t recvfd(int sockfd);
ssize_t sendfd(int sockfd, struct file_t file);
#endif /* !defined(CMSG_H) */

View file

@ -2,36 +2,46 @@
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <syslog.h>
#include <termios.h>
#include <unistd.h>
#include <glib.h>
#include "cmsg.h"
#define pexit(fmt, ...) \
do { \
fprintf(stderr, "conmon: " fmt " %m\n", ##__VA_ARGS__); \
fprintf(stderr, "[conmon:e]: " fmt " %m\n", ##__VA_ARGS__); \
syslog(LOG_ERR, "conmon <error>: " fmt ": %m\n", ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
} while (0)
#define nexit(fmt, ...) \
do { \
fprintf(stderr, "conmon: " fmt "\n", ##__VA_ARGS__); \
fprintf(stderr, "[conmon:e]: " fmt "\n", ##__VA_ARGS__); \
syslog(LOG_ERR, "conmon <error>: " fmt " \n", ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
} while (0)
#define nwarn(fmt, ...) \
do { \
fprintf(stderr, "conmon: " fmt "\n", ##__VA_ARGS__); \
fprintf(stderr, "[conmon:w]: " fmt "\n", ##__VA_ARGS__); \
} while (0)
#define ninfo(fmt, ...) \
do { \
fprintf(stderr, "[conmon:i]: " fmt "\n", ##__VA_ARGS__); \
} while (0)
#define _cleanup_(x) __attribute__((cleanup(x)))
@ -58,14 +68,6 @@ static inline void gstring_free_cleanup(GString **string)
#define _cleanup_close_ _cleanup_(closep)
#define _cleanup_gstring_ _cleanup_(gstring_free_cleanup)
struct termios tty_orig;
static void tty_restore(void)
{
if (tcsetattr(STDIN_FILENO, TCSANOW, &tty_orig) == -1)
pexit("tcsetattr");
}
#define BUF_SIZE 256
#define CMD_SIZE 1024
#define MAX_EVENTS 10
@ -77,6 +79,7 @@ static char *bundle_path = NULL;
static char *pid_file = NULL;
static bool systemd_cgroup = false;
static bool exec = false;
static char *log_path = NULL;
static GOptionEntry entries[] =
{
{ "terminal", 't', 0, G_OPTION_ARG_NONE, &terminal, "Terminal", NULL },
@ -86,9 +89,33 @@ static GOptionEntry entries[] =
{ "pidfile", 'p', 0, G_OPTION_ARG_STRING, &pid_file, "PID file", NULL },
{ "systemd-cgroup", 's', 0, G_OPTION_ARG_NONE, &systemd_cgroup, "Enable systemd cgroup manager", NULL },
{ "exec", 'e', 0, G_OPTION_ARG_NONE, &exec, "Exec a command in a running container", NULL },
{ "log-path", 'l', 0, G_OPTION_ARG_STRING, &log_path, "Log file path", NULL },
{ NULL }
};
int set_k8s_timestamp(char *buf, ssize_t buflen, const char *stream_type)
{
time_t now = time(NULL);
struct tm *tm;
char off_sign = '+';
int off;
if ((tm = localtime(&now)) == NULL) {
return -1;
}
off = (int) tm->tm_gmtoff;
if (tm->tm_gmtoff < 0) {
off_sign = '-';
off = -off;
}
snprintf(buf, buflen, "%d-%02d-%02dT%02d:%02d:%02d%c%02d:%02d %s ",
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec,
off_sign, off / 3600, off % 3600, stream_type);
return 0;
}
int main(int argc, char *argv[])
{
int ret, runtime_status;
@ -98,13 +125,15 @@ int main(int argc, char *argv[])
_cleanup_free_ char *contents;
int cpid = -1;
int status;
pid_t pid;
pid_t pid, create_pid;
_cleanup_close_ int logfd = -1;
_cleanup_close_ int mfd = -1;
_cleanup_close_ int epfd = -1;
char slname[BUF_SIZE];
_cleanup_close_ int csfd = -1;
int runtime_mfd = -1;
char csname[PATH_MAX] = "/tmp/conmon-term.XXXXXXXX";
char buf[BUF_SIZE];
int num_read;
struct termios t;
struct epoll_event ev;
struct epoll_event evlist[MAX_EVENTS];
int sync_pipe_fd = -1;
@ -115,11 +144,11 @@ int main(int argc, char *argv[])
_cleanup_gstring_ GString *cmd = NULL;
/* Command line parameters */
context = g_option_context_new ("- conmon utility");
g_option_context_add_main_entries (context, entries, "conmon");
if (!g_option_context_parse (context, &argc, &argv, &error)) {
g_print ("option parsing failed: %s\n", error->message);
exit (1);
context = g_option_context_new("- conmon utility");
g_option_context_add_main_entries(context, entries, "conmon");
if (!g_option_context_parse(context, &argc, &argv, &error)) {
g_print("option parsing failed: %s\n", error->message);
exit(1);
}
if (cid == NULL)
@ -132,7 +161,6 @@ int main(int argc, char *argv[])
if (getcwd(cwd, sizeof(cwd)) == NULL) {
nexit("Failed to get working directory");
}
bundle_path = cwd;
}
@ -141,10 +169,12 @@ int main(int argc, char *argv[])
"%s/pidfile-%s", cwd, cid) < 0) {
nexit("Failed to generate the pidfile path");
}
pid_file = default_pid_file;
}
if (log_path == NULL)
nexit("Log file path not provided. Use --log-path");
/* Environment variables */
sync_pipe = getenv("_OCI_SYNCPIPE");
if (sync_pipe) {
@ -154,6 +184,11 @@ int main(int argc, char *argv[])
pexit("unable to parse _OCI_SYNCPIPE");
}
/* Open the log path file. */
logfd = open(log_path, O_WRONLY | O_APPEND | O_CREAT);
if (logfd < 0)
pexit("Failed to open log file");
/*
* Set self as subreaper so we can wait for container process
* and return its exit code.
@ -164,163 +199,269 @@ int main(int argc, char *argv[])
}
if (terminal) {
/* Open the master pty */
mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY);
if (mfd < 0)
pexit("Failed to open console master pty");
struct sockaddr_un addr = {0};
/* Grant access to the slave pty */
if (grantpt(mfd) == -1)
pexit("Failed to grant access to slave pty");
/*
* Generate a temporary name. Is this unsafe? Probably, but we can
* replace it with a rename(2) setup if necessary.
*/
/* Unlock the slave pty */
if (unlockpt(mfd) == -1) { /* Unlock slave pty */
pexit("Failed to unlock the slave pty");
}
int unusedfd = g_mkstemp(csname);
if (unusedfd < 0)
pexit("Failed to generate random path for console-socket");
close(unusedfd);
/* Get the slave pty name */
ret = ptsname_r(mfd, slname, BUF_SIZE);
if (ret != 0) {
pexit("Failed to get the slave pty name");
}
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, csname, sizeof(addr.sun_path)-1);
ninfo("addr{sun_family=AF_UNIX, sun_path=%s}", addr.sun_path);
/* Bind to the console socket path. */
csfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (csfd < 0)
pexit("Failed to create console-socket");
/* XXX: This should be handled with a rename(2). */
if (unlink(csname) < 0)
pexit("Failed to unlink temporary ranom path");
if (bind(csfd, (struct sockaddr *) &addr, sizeof(addr)) < 0)
pexit("Failed to bind to console-socket");
if (listen(csfd, 128) < 0)
pexit("Failed to listen on console-socket");
} else {
int fds[2];
/*
* Create a "fake" master fd so that we can use the same epoll code in
* both cases. The runtime_mfd will be closed after we dup over
* everything.
*
* We use pipes here because open(/dev/std{out,err}) will fail if we
* used anything else (and it wouldn't be a good idea to create a new
* pty pair in the host).
*/
if (pipe(fds) < 0)
pexit("Failed to create runtime_mfd pipes");
mfd = fds[0];
runtime_mfd = fds[1];
}
cmd = g_string_new(runtime_path);
if (!exec) {
/* Create the container */
if (systemd_cgroup) {
g_string_append_printf(cmd, " --systemd-cgroup");
}
g_string_append_printf(cmd, " create %s --bundle %s --pid-file %s",
cid, bundle_path, pid_file);
if (terminal) {
g_string_append_printf(cmd, " --console %s", slname);
}
} else {
/* Generate the cmdline. */
if (exec && systemd_cgroup)
g_string_append_printf(cmd, " --systemd-cgroup");
if (exec)
g_string_append_printf(cmd, " exec -d --pid-file %s", pid_file);
else
g_string_append_printf(cmd, " create --bundle %s --pid-file %s", bundle_path, pid_file);
if (terminal)
g_string_append_printf(cmd, " --console-socket %s", csname);
/* Container name comes last. */
g_string_append_printf(cmd, " %s", cid);
/* Set the exec arguments. */
if (exec) {
/*
* FIXME: This code is broken if argv[1] contains spaces or other
* similar characters that shells don't like. It's a bit silly
* that we're doing things inside a shell at all -- this should
* all be done in arrays.
*/
int i;
/* Exec the command */
if (terminal) {
g_string_append_printf(cmd, " exec -d --pid-file %s --console %s %s",
pid_file, slname, cid);
} else {
g_string_append_printf(cmd, " exec -d --pid-file %s %s",
pid_file, cid);
}
for (i = 1; i < argc; i++) {
for (i = 1; i < argc; i++)
g_string_append_printf(cmd, " %s", argv[i]);
}
}
runtime_status = system(cmd->str);
if (runtime_status != 0) {
nexit("Failed to create container");
/*
* We have to fork here because the current runC API dups the stdio of the
* calling process over the container's fds. This is actually *very bad*
* but is currently being discussed for change in
* https://github.com/opencontainers/runtime-spec/pull/513. Hopefully this
* won't be the case for very long.
*/
/* Create our container. */
create_pid = fork();
if (create_pid < 0) {
pexit("Failed to fork the create command");
} else if (!create_pid) {
char *argv[] = {"sh", "-c", cmd->str, NULL};
/* We only need to touch the stdio if we have terminal=false. */
/* FIXME: This results in us not outputting runc error messages to ocid's log. */
if (runtime_mfd >= 0) {
if (dup2(runtime_mfd, STDIN_FILENO) < 0)
pexit("Failed to dup over stdin");
if (dup2(runtime_mfd, STDOUT_FILENO) < 0)
pexit("Failed to dup over stdout");
if (dup2(runtime_mfd, STDERR_FILENO) < 0)
pexit("Failed to dup over stderr");
}
/* Exec into the process. TODO: Don't use the shell. */
execv("/bin/sh", argv);
exit(127);
}
/* The runtime has that fd now. We don't need to touch it anymore. */
close(runtime_mfd);
/* Get the console fd. */
/*
* FIXME: If runc fails to start a container, we won't bail because we're
* busy waiting for requests. The solution probably involves
* epoll(2) and a signalfd(2). This causes a lot of issues.
*/
if (terminal) {
struct file_t console;
int connfd = -1;
ninfo("about to accept from csfd: %d", csfd);
connfd = accept4(csfd, NULL, NULL, SOCK_CLOEXEC);
if (connfd < 0)
pexit("Failed to accept console-socket connection");
/* Not accepting anything else. */
close(csfd);
unlink(csname);
/* We exit if this fails. */
ninfo("about to recvfd from connfd: %d", connfd);
console = recvfd(connfd);
ninfo("console = {.name = '%s'; .fd = %d}", console.name, console.fd);
free(console.name);
mfd = console.fd;
/* Clean up everything */
close(connfd);
}
ninfo("about to waitpid: %d", create_pid);
/* Wait for our create child to exit with the return code. */
if (waitpid(create_pid, &runtime_status, 0) < 0) {
int old_errno = errno;
kill(create_pid, SIGKILL);
errno = old_errno;
pexit("Failed to wait for `runtime %s`", exec ? "exec" : "create");
}
if (!WIFEXITED(runtime_status) || WEXITSTATUS(runtime_status) != 0)
nexit("Failed to create container: exit status %d", WEXITSTATUS(runtime_status));
/* Read the pid so we can wait for the process to exit */
g_file_get_contents(pid_file, &contents, NULL, &err);
if (err) {
fprintf(stderr, "Failed to read pidfile: %s\n", err->message);
nwarn("Failed to read pidfile: %s", err->message);
g_error_free(err);
exit(1);
}
cpid = atoi(contents);
printf("container PID: %d\n", cpid);
ninfo("container PID: %d", cpid);
/* Send the container pid back to parent */
if (sync_pipe_fd > 0 && !exec) {
if (sync_pipe_fd > 0 && !exec) {
len = snprintf(buf, BUF_SIZE, "{\"pid\": %d}\n", cpid);
if (len < 0 || write(sync_pipe_fd, buf, len) != len) {
pexit("unable to send container pid to parent");
}
}
if (terminal) {
/* Save exiting termios settings */
if (tcgetattr(STDIN_FILENO, &tty_orig) == -1)
pexit("tcegetattr");
/* Settings for raw mode */
t.c_lflag &=
~(ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL | IEXTEN);
t.c_iflag &=
~(BRKINT | ICRNL | IGNBRK | IGNCR | INLCR | INPCK | ISTRIP |
IXON | IXOFF | IGNPAR | PARMRK);
t.c_oflag &= ~OPOST;
t.c_cc[VMIN] = 1;
t.c_cc[VTIME] = 0;
/* Set terminal to raw mode */
if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &t) == -1)
pexit("tcsetattr");
/* Setup terminal restore on exit */
if (atexit(tty_restore) != 0)
pexit("atexit");
epfd = epoll_create(5);
if (epfd < 0)
pexit("epoll_create");
ev.events = EPOLLIN;
ev.data.fd = STDIN_FILENO;
if (epoll_ctl(epfd, EPOLL_CTL_ADD, STDIN_FILENO, &ev) < 0) {
pexit("Failed to add stdin to epoll");
}
ev.data.fd = mfd;
if (epoll_ctl(epfd, EPOLL_CTL_ADD, mfd, &ev) < 0) {
pexit("Failed to add console master fd to epoll");
}
/* Copy data back and forth between STDIN and master fd */
while (true) {
int ready = epoll_wait(epfd, evlist, MAX_EVENTS, -1);
int i = 0;
for (i = 0; i < ready; i++) {
if (evlist[i].events & EPOLLIN) {
if (evlist[i].data.fd == STDIN_FILENO) {
num_read =
read(STDIN_FILENO, buf,
BUF_SIZE);
if (num_read <= 0)
goto out;
if (write(mfd, buf, num_read) !=
num_read) {
nwarn
("partial/failed write (masterFd)");
goto out;
}
} else if (evlist[i].data.fd == mfd) {
num_read =
read(mfd, buf, BUF_SIZE);
if (num_read <= 0)
goto out;
if (write
(STDOUT_FILENO, buf,
num_read) != num_read) {
nwarn
("partial/failed write (STDOUT_FILENO)");
goto out;
}
}
} else if (evlist[i].events &
(EPOLLHUP | EPOLLERR)) {
printf("closing fd %d\n",
evlist[i].data.fd);
if (close(evlist[i].data.fd) < 0)
pexit("close");
goto out;
}
}
}
out:
tty_restore();
/* Create epoll_ctl so that we can handle read/write events. */
/*
* TODO: Switch to libuv so that we can also implement exec as well as
* attach and other important things. Using epoll directly is just
* really nasty.
*/
epfd = epoll_create(5);
if (epfd < 0)
pexit("epoll_create");
ev.events = EPOLLIN;
/*
ev.data.fd = STDIN_FILENO;
if (epoll_ctl(epfd, EPOLL_CTL_ADD, STDIN_FILENO, &ev) < 0) {
pexit("Failed to add stdin to epoll");
}
*/
ev.data.fd = mfd;
if (epoll_ctl(epfd, EPOLL_CTL_ADD, mfd, &ev) < 0) {
pexit("Failed to add console master fd to epoll");
}
#define TSBUFLEN 34
char tsbuf[TSBUFLEN];
/*
* Log all of the container's output and pipe STDIN into it. Currently
* nothing using the STDIN setup (which makes its inclusion here a bit
* questionable but we need to rewrite this code soon anyway TODO).
*/
while (true) {
int ready = epoll_wait(epfd, evlist, MAX_EVENTS, -1);
int i = 0;
for (i = 0; i < ready; i++) {
if (evlist[i].events & EPOLLIN) {
if (evlist[i].data.fd == STDIN_FILENO) {
/*
* TODO: We need to replace STDIN_FILENO with something
* more sophisticated so that attach actually works
* properly.
*/
num_read = read(STDIN_FILENO, buf, BUF_SIZE);
if (num_read <= 0)
goto out;
if (write(mfd, buf, num_read) != num_read) {
nwarn("partial/failed write (masterFd)");
goto out;
}
} else if (evlist[i].data.fd == mfd) {
num_read = read(mfd, buf, BUF_SIZE);
if (num_read <= 0)
goto out;
ninfo("read a chunk: (fd=%d) '%s'", mfd, buf);
/* Prepend the CRI-mandated timestamp and other metadata. */
/*
* FIXME: Currently this code makes the assumption that
* @buf doesn't contain any newlines (since the CRI
* requires each line to contain a timestamp). This
* is an /okay/ assumption in most cases because
* ptys generally have newline-buffered output.
*/
int rc = set_k8s_timestamp(tsbuf, TSBUFLEN, "stdout");
if (rc < 0) {
nwarn("failed to set timestamp");
} else {
if (write(logfd, tsbuf, TSBUFLEN) != TSBUFLEN) {
nwarn("partial/failed write ts (logFd)");
}
}
/* Log all output to logfd. */
if (write(logfd, buf, num_read) != num_read) {
nwarn("partial/failed write (logFd)");
goto out;
}
}
} else if (evlist[i].events & (EPOLLHUP | EPOLLERR)) {
printf("closing fd %d\n", evlist[i].data.fd);
if (close(evlist[i].data.fd) < 0)
pexit("close");
goto out;
}
}
}
out:
/* Wait for the container process and record its exit code */
while ((pid = waitpid(-1, &status, 0)) > 0) {
int exit_status = WEXITSTATUS(status);

View file

@ -120,9 +120,13 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error {
args = append(args, "-r", r.Path(c))
args = append(args, "-b", c.bundlePath)
args = append(args, "-p", filepath.Join(c.bundlePath, "pidfile"))
args = append(args, "-l", c.logPath)
if c.terminal {
args = append(args, "-t")
}
logrus.WithFields(logrus.Fields{
"args": args,
}).Debugf("running conmon: %s", r.conmonPath)
cmd := exec.Command(r.conmonPath, args...)
cmd.Dir = c.bundlePath
@ -248,6 +252,19 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp
}
}()
logFile, err := ioutil.TempFile("", "ocid-log-"+c.name)
if err != nil {
return nil, ExecSyncError{
ExitCode: -1,
Err: err,
}
}
logPath := logFile.Name()
defer func() {
logFile.Close()
os.RemoveAll(logPath)
}()
var args []string
args = append(args, "-c", c.name)
args = append(args, "-r", r.Path(c))
@ -256,6 +273,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp
if c.terminal {
args = append(args, "-t")
}
args = append(args, "-l", logPath)
args = append(args, command...)
@ -371,9 +389,25 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp
}
}
// The actual logged output is not the same as stdoutBuf and stderrBuf,
// which are used for getting error information. For the actual
// ExecSyncResponse we have to read the logfile.
// XXX: Currently runC dups the same console over both stdout and stderr,
// so we can't differentiate between the two.
outputBytes, err := ioutil.ReadFile(logPath)
if err != nil {
return nil, ExecSyncError{
Stdout: stdoutBuf,
Stderr: stderrBuf,
ExitCode: -1,
Err: err,
}
}
return &ExecSyncResponse{
Stdout: stdoutBuf.Bytes(),
Stderr: stderrBuf.Bytes(),
Stdout: outputBytes,
Stderr: nil,
ExitCode: 0,
}, nil
}

View file

@ -56,8 +56,6 @@ type RootConfig struct {
// LogDir is the default log directory were all logs will go unless kubelet
// tells us to put them somewhere else.
//
// TODO: This is currently unused until the conmon logging rewrite is done.
LogDir string `toml:"log_dir"`
}

View file

@ -203,6 +203,29 @@ func setupContainerUser(specgen *generate.Generator, rootfs string, sc *pb.Linux
return nil
}
// ensureSaneLogPath is a hack to fix https://issues.k8s.io/44043 which causes
// logPath to be a broken symlink to some magical Docker path. Ideally we
// wouldn't have to deal with this, but until that issue is fixed we have to
// remove the path if it's a broken symlink.
func ensureSaneLogPath(logPath string) error {
// If the path exists but the resolved path does not, then we have a broken
// symlink and we need to remove it.
fi, err := os.Lstat(logPath)
if err != nil || fi.Mode()&os.ModeSymlink == 0 {
// Non-existant files and non-symlinks aren't our problem.
return nil
}
_, err = os.Stat(logPath)
if os.IsNotExist(err) {
err = os.RemoveAll(logPath)
if err != nil {
return fmt.Errorf("ensureSaneLogPath remove bad logPath: %s", err)
}
}
return nil
}
// CreateContainer creates a new container in specified PodSandbox
func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) {
logrus.Debugf("CreateContainerRequest %+v", req)
@ -332,6 +355,27 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
}
logPath := containerConfig.LogPath
if logPath == "" {
// TODO: Should we use sandboxConfig.GetLogDirectory() here?
logPath = filepath.Join(sb.logDir, containerID+".log")
}
if !filepath.IsAbs(logPath) {
// XXX: It's not really clear what this should be versus the sbox logDirectory.
logrus.Warnf("requested logPath for ctr id %s is a relative path: %s", containerID, logPath)
logPath = filepath.Join(sb.logDir, logPath)
}
// Handle https://issues.k8s.io/44043
if err := ensureSaneLogPath(logPath); err != nil {
return nil, err
}
logrus.WithFields(logrus.Fields{
"sbox.logdir": sb.logDir,
"ctr.logfile": containerConfig.LogPath,
"log_path": logPath,
}).Debugf("setting container's log_path")
specgen.SetProcessTerminal(containerConfig.Tty)
linux := containerConfig.GetLinux()

View file

@ -121,7 +121,6 @@ func hostNetNsPath() (string, error) {
}
defer netNS.Close()
return netNS.Path(), nil
}

View file

@ -149,12 +149,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
g.SetHostname(hostname)
}
// set log directory
logDir := req.GetConfig().LogDirectory
if logDir == "" {
logDir = filepath.Join(s.config.LogDir, id)
}
// set DNS options
if req.GetConfig().GetDnsConfig() != nil {
dnsServers := req.GetConfig().GetDnsConfig().Servers
@ -194,6 +188,19 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err
}
// set log directory
logDir := req.GetConfig().LogDirectory
if logDir == "" {
logDir = filepath.Join(s.config.LogDir, id)
}
if err = os.MkdirAll(logDir, 0700); err != nil {
return nil, err
}
// This should always be absolute from k8s.
if !filepath.IsAbs(logDir) {
return nil, fmt.Errorf("requested logDir for sbox id %s is a relative path: %s", id, logDir)
}
// Don't use SELinux separation with Host Pid or IPC Namespace,
if !req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().HostPid && !req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().HostIpc {
processLabel, mountLabel, err = getSELinuxLabels(nil)
@ -245,12 +252,19 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
}
}()
privileged := s.privilegedSandbox(req)
// set log path inside log directory
logPath := filepath.Join(logDir, id+".log")
// Handle https://issues.k8s.io/44043
if err := ensureSaneLogPath(logPath); err != nil {
return nil, err
}
privileged := s.privilegedSandbox(req)
g.AddAnnotation("ocid/metadata", string(metadataJSON))
g.AddAnnotation("ocid/labels", string(labelsJSON))
g.AddAnnotation("ocid/annotations", string(annotationsJSON))
g.AddAnnotation("ocid/log_path", logDir)
g.AddAnnotation("ocid/log_path", logPath)
g.AddAnnotation("ocid/name", name)
g.AddAnnotation("ocid/container_type", containerTypeSandbox)
g.AddAnnotation("ocid/sandbox_id", id)
@ -379,7 +393,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err)
}
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logDir, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged)
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged)
if err != nil {
return nil, err
}

View file

@ -5,6 +5,7 @@ import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sync"
"github.com/Sirupsen/logrus"
@ -175,7 +176,7 @@ func (s *Server) loadSandbox(id string) error {
sb := &sandbox{
id: id,
name: name,
logDir: m.Annotations["ocid/log_path"],
logDir: filepath.Dir(m.Annotations["ocid/log_path"]),
labels: labels,
containers: oci.NewMemoryStore(),
processLabel: processLabel,
@ -225,7 +226,7 @@ func (s *Server) loadSandbox(id string) error {
}
}()
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false, privileged)
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, nil, nil, id, false, privileged)
if err != nil {
return err
}

View file

@ -99,6 +99,103 @@ function teardown() {
stop_ocid
}
@test "ctr logging" {
start_ocid
run ocic pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic pod list
echo "$output"
[ "$status" -eq 0 ]
# Create a new container.
newconfig=$(mktemp --tmpdir ocid-config.XXXXXX.json)
cp "$TESTDATA"/container_config_logging.json "$newconfig"
sed -i 's|"%echooutput%"|"here", "is", "some", "output"|' "$newconfig"
run ocic ctr create --config "$newconfig" --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr stop --id "$ctr_id"
echo "$output"
# Ignore errors on stop.
run ocic ctr status --id "$ctr_id"
[ "$status" -eq 0 ]
run ocic ctr remove --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
# Check that the output is what we expect.
run cat "$DEFAULT_LOG_PATH/$pod_id/$ctr_id.log"
echo "$DEFAULT_LOG_PATH/$pod_id/$ctr_id.log ::" "$output"
[ "$status" -eq 0 ]
[[ "$output" == *"here is some output" ]]
run ocic pod stop --id "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic pod remove --id "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_ocid
}
@test "ctr logging [tty=true]" {
start_ocid
run ocic pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic pod list
echo "$output"
[ "$status" -eq 0 ]
# Create a new container.
newconfig=$(mktemp --tmpdir ocid-config.XXXXXX.json)
cp "$TESTDATA"/container_config_logging.json "$newconfig"
sed -i 's|"%echooutput%"|"here", "is", "some", "output"|' "$newconfig"
sed -i 's|"tty": false,|"tty": true,|' "$newconfig"
run ocic ctr create --config "$newconfig" --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr stop --id "$ctr_id"
echo "$output"
# Ignore errors on stop.
run ocic ctr status --id "$ctr_id"
[ "$status" -eq 0 ]
run ocic ctr remove --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
# Check that the output is what we expect.
run cat "$DEFAULT_LOG_PATH/$pod_id/$ctr_id.log"
echo "$DEFAULT_LOG_PATH/$pod_id/$ctr_id.log ::" "$output"
[ "$status" -eq 0 ]
[[ "$output" == *"here is some output" ]]
run ocic pod stop --id "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic pod remove --id "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# regression test for #127
@test "ctrs status for a pod" {
start_ocid

View file

@ -45,6 +45,8 @@ COPYIMG_BINARY=${COPYIMG_BINARY:-${OCID_ROOT}/cri-o/test/copyimg/copyimg}
ARTIFACTS_PATH=${ARTIFACTS_PATH:-${OCID_ROOT}/cri-o/.artifacts}
# Path of the checkseccomp binary.
CHECKSECCOMP_BINARY=${CHECKSECCOMP_BINARY:-${OCID_ROOT}/cri-o/test/checkseccomp/checkseccomp}
# XXX: This is hardcoded inside cri-o at the moment.
DEFAULT_LOG_PATH=/var/log/ocid/pods
TESTDIR=$(mktemp -d)
if [ -e /usr/sbin/selinuxenabled ] && /usr/sbin/selinuxenabled; then
@ -75,6 +77,16 @@ if ! [ -d "$ARTIFACTS_PATH"/redis-image ]; then
fi
fi
# Make sure we have a copy of the busybox:latest image.
if ! [ -d "$ARTIFACTS_PATH"/busybox-image ]; then
mkdir -p "$ARTIFACTS_PATH"/busybox-image
if ! "$COPYIMG_BINARY" --import-from=docker://busybox --export-to=dir:"$ARTIFACTS_PATH"/busybox-image --signature-policy="$INTEGRATION_ROOT"/policy.json ; then
echo "Error pulling docker://busybox"
rm -fr "$ARTIFACTS_PATH"/busybox-image
exit 1
fi
fi
# Run ocid using the binary specified by $OCID_BINARY.
# This must ONLY be run on engines created with `start_ocid`.
function ocid() {
@ -145,6 +157,11 @@ function start_ocid() {
ocic image pull redis:latest
fi
REDIS_IMAGEID=$(ocic image status --id=redis | head -1 | sed -e "s/ID: //g")
run ocic image status --id=busybox
if [ "$status" -ne 0 ] ; then
ocic image pull busybox:latest
fi
BUSYBOX_IMAGEID=$(ocic image status --id=busybox | head -1 | sed -e "s/ID: //g")
}
function cleanup_ctrs() {

View file

@ -8,7 +8,7 @@ function teardown() {
@test "ocic runtimeversion" {
start_ocid
ocic runtimeversion
run ocic runtimeversion
echo "$output"
[ "$status" -eq 0 ]
stop_ocid

View file

@ -7,11 +7,9 @@
"image": "docker://redis:latest"
},
"command": [
"/bin/bash"
],
"args": [
"/bin/ls"
],
"args": [],
"working_dir": "/",
"envs": [
{
@ -41,7 +39,7 @@
},
"privileged": true,
"readonly_rootfs": true,
"log_path": "container.log",
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,

View file

@ -41,7 +41,7 @@
},
"privileged": true,
"readonly_rootfs": true,
"log_path": "container.log",
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,

View file

@ -0,0 +1,82 @@
{
"metadata": {
"name": "container1",
"attempt": 1
},
"image": {
"image": "docker://busybox:latest"
},
"command": [
"/bin/echo"
],
"args": [
"%echooutput%"
],
"working_dir": "/",
"envs": [
{
"key": "PATH",
"value": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
},
{
"key": "TERM",
"value": "xterm"
},
{
"key": "TESTDIR",
"value": "test/dir1"
},
{
"key": "TESTFILE",
"value": "test/file1"
}
],
"labels": {
"type": "small",
"batch": "no"
},
"annotations": {
"owner": "dragon",
"daemon": "ocid"
},
"privileged": true,
"readonly_rootfs": true,
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,
"linux": {
"resources": {
"cpu_period": 10000,
"cpu_quota": 20000,
"cpu_shares": 512,
"memory_limit_in_bytes": 88000000,
"oom_score_adj": 30
},
"capabilities": {
"add_capabilities": [
"setuid",
"setgid"
],
"drop_capabilities": [
"audit_write",
"audit_read"
]
},
"selinux_options": {
"user": "system_u",
"role": "system_r",
"type": "container_t",
"level": "s0:c4,c5"
},
"user": {
"uid": 5,
"gid": 300,
"additional_gids": [
400,
401,
402
]
}
}
}

View file

@ -41,7 +41,7 @@
},
"privileged": true,
"readonly_rootfs": true,
"log_path": "container.log",
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,

View file

@ -15,7 +15,7 @@
}
],
"readonly_rootfs": true,
"log_path": "container.log",
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,

View file

@ -39,7 +39,7 @@
"pod": "podsandbox1"
},
"readonly_rootfs": false,
"log_path": "container.log",
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,

View file

@ -6,7 +6,7 @@
"attempt": 1
},
"hostname": "ocic_host",
"log_directory": ".",
"log_directory": "",
"dns_options": {
"servers": [
"server1.redhat.com",

View file

@ -6,7 +6,7 @@
"attempt": 1
},
"hostname": "ocic_host",
"log_directory": ".",
"log_directory": "",
"dns_options": {
"servers": [
"server1.redhat.com",

View file

@ -6,7 +6,7 @@
"attempt": 1
},
"hostname": "ocic_host",
"log_directory": ".",
"log_directory": "",
"dns_options": {
"servers": [
"server1.redhat.com",