Update runc to ce450bcc6c135cae93ee2a99d41a308c179ff6dc
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
e09b0b0c35
commit
271cac8634
13 changed files with 1254 additions and 5 deletions
|
@ -1,5 +1,5 @@
|
||||||
# go-runc client for runc; master as of 01/20/2017
|
# go-runc client for runc; master as of 01/20/2017
|
||||||
github.com/crosbymichael/go-runc afca56d262e694d9056e937a0877a39ab879aeb4
|
github.com/crosbymichael/go-runc 7b66c5da30493c5eb9c655cab67ba88071891ac5
|
||||||
# go-metrics client to prometheus; master as of 12/16/2016
|
# go-metrics client to prometheus; master as of 12/16/2016
|
||||||
github.com/docker/go-metrics 0f35294225552d968a13f9c5bc71a3fa44b2eb87
|
github.com/docker/go-metrics 0f35294225552d968a13f9c5bc71a3fa44b2eb87
|
||||||
# prometheus client; latest release as of 12/16/2016
|
# prometheus client; latest release as of 12/16/2016
|
||||||
|
@ -31,7 +31,7 @@ github.com/nats-io/go-nats-streaming v0.3.4
|
||||||
# gnatsd; latest release as of 12/16/2016
|
# gnatsd; latest release as of 12/16/2016
|
||||||
github.com/nats-io/gnatsd v0.9.6
|
github.com/nats-io/gnatsd v0.9.6
|
||||||
# runc, latest release as of 12/16/2016
|
# runc, latest release as of 12/16/2016
|
||||||
github.com/opencontainers/runc v1.0.0-rc2
|
github.com/opencontainers/runc ce450bcc6c135cae93ee2a99d41a308c179ff6dc
|
||||||
# OCI runtime spec, latest release as of 12/16/2016
|
# OCI runtime spec, latest release as of 12/16/2016
|
||||||
github.com/opencontainers/runtime-spec v1.0.0-rc3
|
github.com/opencontainers/runtime-spec v1.0.0-rc3
|
||||||
# logrus, latest release as of 12/16/2016
|
# logrus, latest release as of 12/16/2016
|
||||||
|
|
32
vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
generated
vendored
Normal file
32
vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
generated
vendored
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
#ifndef NSENTER_NAMESPACE_H
|
||||||
|
#define NSENTER_NAMESPACE_H
|
||||||
|
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
# define _GNU_SOURCE
|
||||||
|
#endif
|
||||||
|
#include <sched.h>
|
||||||
|
|
||||||
|
/* All of these are taken from include/uapi/linux/sched.h */
|
||||||
|
#ifndef CLONE_NEWNS
|
||||||
|
# define CLONE_NEWNS 0x00020000 /* New mount namespace group */
|
||||||
|
#endif
|
||||||
|
#ifndef CLONE_NEWCGROUP
|
||||||
|
# define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
|
||||||
|
#endif
|
||||||
|
#ifndef CLONE_NEWUTS
|
||||||
|
# define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
|
||||||
|
#endif
|
||||||
|
#ifndef CLONE_NEWIPC
|
||||||
|
# define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
|
||||||
|
#endif
|
||||||
|
#ifndef CLONE_NEWUSER
|
||||||
|
# define CLONE_NEWUSER 0x10000000 /* New user namespace */
|
||||||
|
#endif
|
||||||
|
#ifndef CLONE_NEWPID
|
||||||
|
# define CLONE_NEWPID 0x20000000 /* New pid namespace */
|
||||||
|
#endif
|
||||||
|
#ifndef CLONE_NEWNET
|
||||||
|
# define CLONE_NEWNET 0x40000000 /* New network namespace */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* NSENTER_NAMESPACE_H */
|
12
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
generated
vendored
Normal file
12
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
// +build linux,!gccgo
|
||||||
|
|
||||||
|
package nsenter
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo CFLAGS: -Wall
|
||||||
|
extern void nsexec();
|
||||||
|
void __attribute__((constructor)) init(void) {
|
||||||
|
nsexec();
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
import "C"
|
25
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
generated
vendored
Normal file
25
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
generated
vendored
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
// +build linux,gccgo
|
||||||
|
|
||||||
|
package nsenter
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo CFLAGS: -Wall
|
||||||
|
extern void nsexec();
|
||||||
|
void __attribute__((constructor)) init(void) {
|
||||||
|
nsexec();
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
// AlwaysFalse is here to stay false
|
||||||
|
// (and be exported so the compiler doesn't optimize out its reference)
|
||||||
|
var AlwaysFalse bool
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if AlwaysFalse {
|
||||||
|
// by referencing this C init() in a noop test, it will ensure the compiler
|
||||||
|
// links in the C function.
|
||||||
|
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
|
||||||
|
C.init()
|
||||||
|
}
|
||||||
|
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
generated
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
// +build !linux !cgo
|
||||||
|
|
||||||
|
package nsenter
|
||||||
|
|
||||||
|
import "C"
|
759
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
Normal file
759
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
Normal file
|
@ -0,0 +1,759 @@
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <endian.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <grp.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <setjmp.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/prctl.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <linux/limits.h>
|
||||||
|
#include <linux/netlink.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
/* Get all of the CLONE_NEW* flags. */
|
||||||
|
#include "namespace.h"
|
||||||
|
|
||||||
|
/* Synchronisation values. */
|
||||||
|
enum sync_t {
|
||||||
|
SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */
|
||||||
|
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
|
||||||
|
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
||||||
|
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
||||||
|
SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */
|
||||||
|
|
||||||
|
/* XXX: This doesn't help with segfaults and other such issues. */
|
||||||
|
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* longjmp() arguments. */
|
||||||
|
#define JUMP_PARENT 0x00
|
||||||
|
#define JUMP_CHILD 0xA0
|
||||||
|
#define JUMP_INIT 0xA1
|
||||||
|
|
||||||
|
/* JSON buffer. */
|
||||||
|
#define JSON_MAX 4096
|
||||||
|
|
||||||
|
/* Assume the stack grows down, so arguments should be above it. */
|
||||||
|
struct clone_t {
|
||||||
|
/*
|
||||||
|
* Reserve some space for clone() to locate arguments
|
||||||
|
* and retcode in this place
|
||||||
|
*/
|
||||||
|
char stack[4096] __attribute__ ((aligned(16)));
|
||||||
|
char stack_ptr[0];
|
||||||
|
|
||||||
|
/* There's two children. This is used to execute the different code. */
|
||||||
|
jmp_buf *env;
|
||||||
|
int jmpval;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nlconfig_t {
|
||||||
|
char *data;
|
||||||
|
uint32_t cloneflags;
|
||||||
|
char *uidmap;
|
||||||
|
size_t uidmap_len;
|
||||||
|
char *gidmap;
|
||||||
|
size_t gidmap_len;
|
||||||
|
char *namespaces;
|
||||||
|
size_t namespaces_len;
|
||||||
|
uint8_t is_setgroup;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* List of netlink message types sent to us as part of bootstrapping the init.
|
||||||
|
* These constants are defined in libcontainer/message_linux.go.
|
||||||
|
*/
|
||||||
|
#define INIT_MSG 62000
|
||||||
|
#define CLONE_FLAGS_ATTR 27281
|
||||||
|
#define NS_PATHS_ATTR 27282
|
||||||
|
#define UIDMAP_ATTR 27283
|
||||||
|
#define GIDMAP_ATTR 27284
|
||||||
|
#define SETGROUP_ATTR 27285
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use the raw syscall for versions of glibc which don't include a function for
|
||||||
|
* it, namely (glibc 2.12).
|
||||||
|
*/
|
||||||
|
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
|
||||||
|
# define _GNU_SOURCE
|
||||||
|
# include "syscall.h"
|
||||||
|
# if !defined(SYS_setns) && defined(__NR_setns)
|
||||||
|
# define SYS_setns __NR_setns
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#ifndef SYS_setns
|
||||||
|
# error "setns(2) syscall not supported by glibc version"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int setns(int fd, int nstype)
|
||||||
|
{
|
||||||
|
return syscall(SYS_setns, fd, nstype);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* XXX: This is ugly. */
|
||||||
|
static int syncfd = -1;
|
||||||
|
|
||||||
|
/* TODO(cyphar): Fix this so it correctly deals with syncT. */
|
||||||
|
#define bail(fmt, ...) \
|
||||||
|
do { \
|
||||||
|
int ret = __COUNTER__ + 1; \
|
||||||
|
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
|
||||||
|
if (syncfd >= 0) { \
|
||||||
|
enum sync_t s = SYNC_ERR; \
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) \
|
||||||
|
fprintf(stderr, "nsenter: failed: write(s)"); \
|
||||||
|
if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret)) \
|
||||||
|
fprintf(stderr, "nsenter: failed: write(ret)"); \
|
||||||
|
} \
|
||||||
|
exit(ret); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
static int write_file(char *data, size_t data_len, char *pathfmt, ...)
|
||||||
|
{
|
||||||
|
int fd, len, ret = 0;
|
||||||
|
char path[PATH_MAX];
|
||||||
|
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, pathfmt);
|
||||||
|
len = vsnprintf(path, PATH_MAX, pathfmt, ap);
|
||||||
|
va_end(ap);
|
||||||
|
if (len < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
fd = open(path, O_RDWR);
|
||||||
|
if (fd < 0) {
|
||||||
|
ret = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = write(fd, data, data_len);
|
||||||
|
if (len != data_len) {
|
||||||
|
ret = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
close(fd);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum policy_t {
|
||||||
|
SETGROUPS_DEFAULT = 0,
|
||||||
|
SETGROUPS_ALLOW,
|
||||||
|
SETGROUPS_DENY,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This *must* be called before we touch gid_map. */
|
||||||
|
static void update_setgroups(int pid, enum policy_t setgroup)
|
||||||
|
{
|
||||||
|
char *policy;
|
||||||
|
|
||||||
|
switch (setgroup) {
|
||||||
|
case SETGROUPS_ALLOW:
|
||||||
|
policy = "allow";
|
||||||
|
break;
|
||||||
|
case SETGROUPS_DENY:
|
||||||
|
policy = "deny";
|
||||||
|
break;
|
||||||
|
case SETGROUPS_DEFAULT:
|
||||||
|
/* Nothing to do. */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) {
|
||||||
|
/*
|
||||||
|
* If the kernel is too old to support /proc/pid/setgroups,
|
||||||
|
* open(2) or write(2) will return ENOENT. This is fine.
|
||||||
|
*/
|
||||||
|
if (errno != ENOENT)
|
||||||
|
bail("failed to write '%s' to /proc/%d/setgroups", policy, pid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_uidmap(int pid, char *map, int map_len)
|
||||||
|
{
|
||||||
|
if (map == NULL || map_len <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0)
|
||||||
|
bail("failed to update /proc/%d/uid_map", pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_gidmap(int pid, char *map, int map_len)
|
||||||
|
{
|
||||||
|
if (map == NULL || map_len <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0)
|
||||||
|
bail("failed to update /proc/%d/gid_map", pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A dummy function that just jumps to the given jumpval. */
|
||||||
|
static int child_func(void *arg) __attribute__ ((noinline));
|
||||||
|
static int child_func(void *arg)
|
||||||
|
{
|
||||||
|
struct clone_t *ca = (struct clone_t *)arg;
|
||||||
|
longjmp(*ca->env, ca->jmpval);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
|
||||||
|
static int clone_parent(jmp_buf *env, int jmpval)
|
||||||
|
{
|
||||||
|
struct clone_t ca = {
|
||||||
|
.env = env,
|
||||||
|
.jmpval = jmpval,
|
||||||
|
};
|
||||||
|
|
||||||
|
return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Gets the init pipe fd from the environment, which is used to read the
|
||||||
|
* bootstrap data and tell the parent what the new pid is after we finish
|
||||||
|
* setting up the environment.
|
||||||
|
*/
|
||||||
|
static int initpipe(void)
|
||||||
|
{
|
||||||
|
int pipenum;
|
||||||
|
char *initpipe, *endptr;
|
||||||
|
|
||||||
|
initpipe = getenv("_LIBCONTAINER_INITPIPE");
|
||||||
|
if (initpipe == NULL || *initpipe == '\0')
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
pipenum = strtol(initpipe, &endptr, 10);
|
||||||
|
if (*endptr != '\0')
|
||||||
|
bail("unable to parse _LIBCONTAINER_INITPIPE");
|
||||||
|
|
||||||
|
return pipenum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the clone(2) flag for a namespace, given the name of a namespace. */
|
||||||
|
static int nsflag(char *name)
|
||||||
|
{
|
||||||
|
if (!strcmp(name, "cgroup"))
|
||||||
|
return CLONE_NEWCGROUP;
|
||||||
|
else if (!strcmp(name, "ipc"))
|
||||||
|
return CLONE_NEWIPC;
|
||||||
|
else if (!strcmp(name, "mnt"))
|
||||||
|
return CLONE_NEWNS;
|
||||||
|
else if (!strcmp(name, "net"))
|
||||||
|
return CLONE_NEWNET;
|
||||||
|
else if (!strcmp(name, "pid"))
|
||||||
|
return CLONE_NEWPID;
|
||||||
|
else if (!strcmp(name, "user"))
|
||||||
|
return CLONE_NEWUSER;
|
||||||
|
else if (!strcmp(name, "uts"))
|
||||||
|
return CLONE_NEWUTS;
|
||||||
|
|
||||||
|
/* If we don't recognise a name, fallback to 0. */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t readint32(char *buf)
|
||||||
|
{
|
||||||
|
return *(uint32_t *) buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint8_t readint8(char *buf)
|
||||||
|
{
|
||||||
|
return *(uint8_t *) buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nl_parse(int fd, struct nlconfig_t *config)
|
||||||
|
{
|
||||||
|
size_t len, size;
|
||||||
|
struct nlmsghdr hdr;
|
||||||
|
char *data, *current;
|
||||||
|
|
||||||
|
/* Retrieve the netlink header. */
|
||||||
|
len = read(fd, &hdr, NLMSG_HDRLEN);
|
||||||
|
if (len != NLMSG_HDRLEN)
|
||||||
|
bail("invalid netlink header length %lu", len);
|
||||||
|
|
||||||
|
if (hdr.nlmsg_type == NLMSG_ERROR)
|
||||||
|
bail("failed to read netlink message");
|
||||||
|
|
||||||
|
if (hdr.nlmsg_type != INIT_MSG)
|
||||||
|
bail("unexpected msg type %d", hdr.nlmsg_type);
|
||||||
|
|
||||||
|
/* Retrieve data. */
|
||||||
|
size = NLMSG_PAYLOAD(&hdr, 0);
|
||||||
|
current = data = malloc(size);
|
||||||
|
if (!data)
|
||||||
|
bail("failed to allocate %zu bytes of memory for nl_payload", size);
|
||||||
|
|
||||||
|
len = read(fd, data, size);
|
||||||
|
if (len != size)
|
||||||
|
bail("failed to read netlink payload, %lu != %lu", len, size);
|
||||||
|
|
||||||
|
/* Parse the netlink payload. */
|
||||||
|
config->data = data;
|
||||||
|
while (current < data + size) {
|
||||||
|
struct nlattr *nlattr = (struct nlattr *)current;
|
||||||
|
size_t payload_len = nlattr->nla_len - NLA_HDRLEN;
|
||||||
|
|
||||||
|
/* Advance to payload. */
|
||||||
|
current += NLA_HDRLEN;
|
||||||
|
|
||||||
|
/* Handle payload. */
|
||||||
|
switch (nlattr->nla_type) {
|
||||||
|
case CLONE_FLAGS_ATTR:
|
||||||
|
config->cloneflags = readint32(current);
|
||||||
|
break;
|
||||||
|
case NS_PATHS_ATTR:
|
||||||
|
config->namespaces = current;
|
||||||
|
config->namespaces_len = payload_len;
|
||||||
|
break;
|
||||||
|
case UIDMAP_ATTR:
|
||||||
|
config->uidmap = current;
|
||||||
|
config->uidmap_len = payload_len;
|
||||||
|
break;
|
||||||
|
case GIDMAP_ATTR:
|
||||||
|
config->gidmap = current;
|
||||||
|
config->gidmap_len = payload_len;
|
||||||
|
break;
|
||||||
|
case SETGROUP_ATTR:
|
||||||
|
config->is_setgroup = readint8(current);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
bail("unknown netlink message type %d", nlattr->nla_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
current += NLA_ALIGN(payload_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void nl_free(struct nlconfig_t *config)
|
||||||
|
{
|
||||||
|
free(config->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void join_namespaces(char *nslist)
|
||||||
|
{
|
||||||
|
int num = 0, i;
|
||||||
|
char *saveptr = NULL;
|
||||||
|
char *namespace = strtok_r(nslist, ",", &saveptr);
|
||||||
|
struct namespace_t {
|
||||||
|
int fd;
|
||||||
|
int ns;
|
||||||
|
char type[PATH_MAX];
|
||||||
|
char path[PATH_MAX];
|
||||||
|
} *namespaces = NULL;
|
||||||
|
|
||||||
|
if (!namespace || !strlen(namespace) || !strlen(nslist))
|
||||||
|
bail("ns paths are empty");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have to open the file descriptors first, since after
|
||||||
|
* we join the mnt namespace we might no longer be able to
|
||||||
|
* access the paths.
|
||||||
|
*/
|
||||||
|
do {
|
||||||
|
int fd;
|
||||||
|
char *path;
|
||||||
|
struct namespace_t *ns;
|
||||||
|
|
||||||
|
/* Resize the namespace array. */
|
||||||
|
namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
|
||||||
|
if (!namespaces)
|
||||||
|
bail("failed to reallocate namespace array");
|
||||||
|
ns = &namespaces[num - 1];
|
||||||
|
|
||||||
|
/* Split 'ns:path'. */
|
||||||
|
path = strstr(namespace, ":");
|
||||||
|
if (!path)
|
||||||
|
bail("failed to parse %s", namespace);
|
||||||
|
*path++ = '\0';
|
||||||
|
|
||||||
|
fd = open(path, O_RDONLY);
|
||||||
|
if (fd < 0)
|
||||||
|
bail("failed to open %s", path);
|
||||||
|
|
||||||
|
ns->fd = fd;
|
||||||
|
ns->ns = nsflag(namespace);
|
||||||
|
strncpy(ns->path, path, PATH_MAX);
|
||||||
|
} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The ordering in which we join namespaces is important. We should
|
||||||
|
* always join the user namespace *first*. This is all guaranteed
|
||||||
|
* from the container_linux.go side of this, so we're just going to
|
||||||
|
* follow the order given to us.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (i = 0; i < num; i++) {
|
||||||
|
struct namespace_t ns = namespaces[i];
|
||||||
|
|
||||||
|
if (setns(ns.fd, ns.ns) < 0)
|
||||||
|
bail("failed to setns to %s", ns.path);
|
||||||
|
|
||||||
|
close(ns.fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(namespaces);
|
||||||
|
}
|
||||||
|
|
||||||
|
void nsexec(void)
|
||||||
|
{
|
||||||
|
int pipenum;
|
||||||
|
jmp_buf env;
|
||||||
|
int syncpipe[2];
|
||||||
|
struct nlconfig_t config = {0};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we don't have an init pipe, just return to the go routine.
|
||||||
|
* We'll only get an init pipe for start or exec.
|
||||||
|
*/
|
||||||
|
pipenum = initpipe();
|
||||||
|
if (pipenum == -1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* make the process non-dumpable */
|
||||||
|
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
|
||||||
|
bail("failed to set process as non-dumpable");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parse all of the netlink configuration. */
|
||||||
|
nl_parse(pipenum, &config);
|
||||||
|
|
||||||
|
/* Pipe so we can tell the child when we've finished setting up. */
|
||||||
|
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
|
||||||
|
bail("failed to setup sync pipe between parent and child");
|
||||||
|
|
||||||
|
/* TODO: Currently we aren't dealing with child deaths properly. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Okay, so this is quite annoying.
|
||||||
|
*
|
||||||
|
* In order for this unsharing code to be more extensible we need to split
|
||||||
|
* up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case
|
||||||
|
* would be if we did clone(CLONE_NEWUSER) and the other namespaces
|
||||||
|
* separately, but because of SELinux issues we cannot really do that. But
|
||||||
|
* we cannot just dump the namespace flags into clone(...) because several
|
||||||
|
* usecases (such as rootless containers) require more granularity around
|
||||||
|
* the namespace setup. In addition, some older kernels had issues where
|
||||||
|
* CLONE_NEWUSER wasn't handled before other namespaces (but we cannot
|
||||||
|
* handle this while also dealing with SELinux so we choose SELinux support
|
||||||
|
* over broken kernel support).
|
||||||
|
*
|
||||||
|
* However, if we unshare(2) the user namespace *before* we clone(2), then
|
||||||
|
* all hell breaks loose.
|
||||||
|
*
|
||||||
|
* The parent no longer has permissions to do many things (unshare(2) drops
|
||||||
|
* all capabilities in your old namespace), and the container cannot be set
|
||||||
|
* up to have more than one {uid,gid} mapping. This is obviously less than
|
||||||
|
* ideal. In order to fix this, we have to first clone(2) and then unshare.
|
||||||
|
*
|
||||||
|
* Unfortunately, it's not as simple as that. We have to fork to enter the
|
||||||
|
* PID namespace (the PID namespace only applies to children). Since we'll
|
||||||
|
* have to double-fork, this clone_parent() call won't be able to get the
|
||||||
|
* PID of the _actual_ init process (without doing more synchronisation than
|
||||||
|
* I can deal with at the moment). So we'll just get the parent to send it
|
||||||
|
* for us, the only job of this process is to update
|
||||||
|
* /proc/pid/{setgroups,uid_map,gid_map}.
|
||||||
|
*
|
||||||
|
* And as a result of the above, we also need to setns(2) in the first child
|
||||||
|
* because if we join a PID namespace in the topmost parent then our child
|
||||||
|
* will be in that namespace (and it will not be able to give us a PID value
|
||||||
|
* that makes sense without resorting to sending things with cmsg).
|
||||||
|
*
|
||||||
|
* This also deals with an older issue caused by dumping cloneflags into
|
||||||
|
* clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so
|
||||||
|
* we have to unshare(2) before clone(2) in order to do this. This was fixed
|
||||||
|
* in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was
|
||||||
|
* introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're
|
||||||
|
* aware, the last mainline kernel which had this bug was Linux 3.12.
|
||||||
|
* However, we cannot comment on which kernels the broken patch was
|
||||||
|
* backported to.
|
||||||
|
*
|
||||||
|
* -- Aleksa "what has my life come to?" Sarai
|
||||||
|
*/
|
||||||
|
|
||||||
|
switch (setjmp(env)) {
|
||||||
|
/*
|
||||||
|
* Stage 0: We're in the parent. Our job is just to create a new child
|
||||||
|
* (stage 1: JUMP_CHILD) process and write its uid_map and
|
||||||
|
* gid_map. That process will go on to create a new process, then
|
||||||
|
* it will send us its PID which we will send to the bootstrap
|
||||||
|
* process.
|
||||||
|
*/
|
||||||
|
case JUMP_PARENT: {
|
||||||
|
int len, ready = 0;
|
||||||
|
pid_t child;
|
||||||
|
char buf[JSON_MAX];
|
||||||
|
|
||||||
|
/* For debugging. */
|
||||||
|
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
|
||||||
|
|
||||||
|
/* Start the process of getting a container. */
|
||||||
|
child = clone_parent(&env, JUMP_CHILD);
|
||||||
|
if (child < 0)
|
||||||
|
bail("unable to fork: child_func");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* State machine for synchronisation with the children.
|
||||||
|
*
|
||||||
|
* Father only return when both child and grandchild are
|
||||||
|
* ready, so we can receive all possible error codes
|
||||||
|
* generated by children.
|
||||||
|
*/
|
||||||
|
while (ready < 2) {
|
||||||
|
enum sync_t s;
|
||||||
|
|
||||||
|
/* This doesn't need to be global, we're in the parent. */
|
||||||
|
int syncfd = syncpipe[1];
|
||||||
|
|
||||||
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with child: next state");
|
||||||
|
|
||||||
|
switch (s) {
|
||||||
|
case SYNC_ERR: {
|
||||||
|
/* We have to mirror the error code of the child. */
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
||||||
|
bail("failed to sync with child: read(error code)");
|
||||||
|
|
||||||
|
exit(ret);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SYNC_USERMAP_PLS:
|
||||||
|
/* Enable setgroups(2) if we've been asked to. */
|
||||||
|
if (config.is_setgroup)
|
||||||
|
update_setgroups(child, SETGROUPS_ALLOW);
|
||||||
|
|
||||||
|
/* Set up mappings. */
|
||||||
|
update_uidmap(child, config.uidmap, config.uidmap_len);
|
||||||
|
update_gidmap(child, config.gidmap, config.gidmap_len);
|
||||||
|
|
||||||
|
s = SYNC_USERMAP_ACK;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SYNC_USERMAP_ACK:
|
||||||
|
/* We should _never_ receive acks. */
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
|
||||||
|
break;
|
||||||
|
case SYNC_RECVPID_PLS: {
|
||||||
|
pid_t old = child;
|
||||||
|
|
||||||
|
/* Get the init_func pid. */
|
||||||
|
if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
|
||||||
|
kill(old, SIGKILL);
|
||||||
|
bail("failed to sync with child: read(childpid)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Send ACK. */
|
||||||
|
s = SYNC_RECVPID_ACK;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(old, SIGKILL);
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ready++;
|
||||||
|
break;
|
||||||
|
case SYNC_RECVPID_ACK:
|
||||||
|
/* We should _never_ receive acks. */
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
|
||||||
|
break;
|
||||||
|
case SYNC_CHILD_READY:
|
||||||
|
ready++;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
bail("unexpected sync value");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Send the init_func pid back to our parent. */
|
||||||
|
len = snprintf(buf, JSON_MAX, "{\"pid\": %d}\n", child);
|
||||||
|
if (len < 0) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("unable to generate JSON for child pid");
|
||||||
|
}
|
||||||
|
if (write(pipenum, buf, len) != len) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("unable to send child pid to bootstrapper");
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stage 1: We're in the first child process. Our job is to join any
|
||||||
|
* provided namespaces in the netlink payload and unshare all
|
||||||
|
* of the requested namespaces. If we've been asked to
|
||||||
|
* CLONE_NEWUSER, we will ask our parent (stage 0) to set up
|
||||||
|
* our user mappings for us. Then, we create a new child
|
||||||
|
* (stage 2: JUMP_INIT) for PID namespace. We then send the
|
||||||
|
* child's PID to our parent (stage 0).
|
||||||
|
*/
|
||||||
|
case JUMP_CHILD: {
|
||||||
|
pid_t child;
|
||||||
|
enum sync_t s;
|
||||||
|
|
||||||
|
/* We're in a child and thus need to tell the parent if we die. */
|
||||||
|
syncfd = syncpipe[0];
|
||||||
|
|
||||||
|
/* For debugging. */
|
||||||
|
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to setns first. We cannot do this earlier (in stage 0)
|
||||||
|
* because of the fact that we forked to get here (the PID of
|
||||||
|
* [stage 2: JUMP_INIT]) would be meaningless). We could send it
|
||||||
|
* using cmsg(3) but that's just annoying.
|
||||||
|
*/
|
||||||
|
if (config.namespaces)
|
||||||
|
join_namespaces(config.namespaces);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unshare all of the namespaces. Now, it should be noted that this
|
||||||
|
* ordering might break in the future (especially with rootless
|
||||||
|
* containers). But for now, it's not possible to split this into
|
||||||
|
* CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
|
||||||
|
*
|
||||||
|
* Note that we don't merge this with clone() because there were
|
||||||
|
* some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
|
||||||
|
* was broken, so we'll just do it the long way anyway.
|
||||||
|
*/
|
||||||
|
if (unshare(config.cloneflags) < 0)
|
||||||
|
bail("failed to unshare namespaces");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Deal with user namespaces first. They are quite special, as they
|
||||||
|
* affect our ability to unshare other namespaces and are used as
|
||||||
|
* context for privilege checks.
|
||||||
|
*/
|
||||||
|
if (config.cloneflags & CLONE_NEWUSER) {
|
||||||
|
/*
|
||||||
|
* We don't have the privileges to do any mapping here (see the
|
||||||
|
* clone_parent rant). So signal our parent to hook us up.
|
||||||
|
*/
|
||||||
|
|
||||||
|
s = SYNC_USERMAP_PLS;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
|
||||||
|
|
||||||
|
/* ... wait for mapping ... */
|
||||||
|
|
||||||
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
|
||||||
|
if (s != SYNC_USERMAP_ACK)
|
||||||
|
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: What about non-namespace clone flags that we're dropping here?
|
||||||
|
*
|
||||||
|
* We fork again because of PID namespace, setns(2) or unshare(2) don't
|
||||||
|
* change the PID namespace of the calling process, because doing so
|
||||||
|
* would change the caller's idea of its own PID (as reported by getpid()),
|
||||||
|
* which would break many applications and libraries, so we must fork
|
||||||
|
* to actually enter the new PID namespace.
|
||||||
|
*/
|
||||||
|
child = clone_parent(&env, JUMP_INIT);
|
||||||
|
if (child < 0)
|
||||||
|
bail("unable to fork: init_func");
|
||||||
|
|
||||||
|
/* Send the child to our parent, which knows what it's doing. */
|
||||||
|
s = SYNC_RECVPID_PLS;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
|
||||||
|
}
|
||||||
|
if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with parent: write(childpid)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ... wait for parent to get the pid ... */
|
||||||
|
|
||||||
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
|
||||||
|
}
|
||||||
|
if (s != SYNC_RECVPID_ACK) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stage 2: We're the final child process, and the only process that will
|
||||||
|
* actually return to the Go runtime. Our job is to just do the
|
||||||
|
* final cleanup steps and then return to the Go runtime to allow
|
||||||
|
* init_linux.go to run.
|
||||||
|
*/
|
||||||
|
case JUMP_INIT: {
|
||||||
|
/*
|
||||||
|
* We're inside the child now, having jumped from the
|
||||||
|
* start_child() code after forking in the parent.
|
||||||
|
*/
|
||||||
|
enum sync_t s;
|
||||||
|
|
||||||
|
/* We're in a child and thus need to tell the parent if we die. */
|
||||||
|
syncfd = syncpipe[0];
|
||||||
|
|
||||||
|
/* For debugging. */
|
||||||
|
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
|
||||||
|
|
||||||
|
if (setsid() < 0)
|
||||||
|
bail("setsid failed");
|
||||||
|
|
||||||
|
if (setuid(0) < 0)
|
||||||
|
bail("setuid failed");
|
||||||
|
|
||||||
|
if (setgid(0) < 0)
|
||||||
|
bail("setgid failed");
|
||||||
|
|
||||||
|
if (setgroups(0, NULL) < 0)
|
||||||
|
bail("setgroups failed");
|
||||||
|
|
||||||
|
s = SYNC_CHILD_READY;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
||||||
|
|
||||||
|
/* Close sync pipes. */
|
||||||
|
close(syncpipe[0]);
|
||||||
|
close(syncpipe[1]);
|
||||||
|
|
||||||
|
/* Free netlink data. */
|
||||||
|
nl_free(&config);
|
||||||
|
|
||||||
|
/* Finish executing, let the Go runtime take over. */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
bail("unexpected jump value");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Should never be reached. */
|
||||||
|
bail("should never be reached");
|
||||||
|
}
|
20
vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
generated
vendored
|
@ -14,8 +14,10 @@ func GetProcessStartTime(pid int) (string, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
return parseStartTime(string(data))
|
||||||
|
}
|
||||||
|
|
||||||
parts := strings.Split(string(data), " ")
|
func parseStartTime(stat string) (string, error) {
|
||||||
// the starttime is located at pos 22
|
// the starttime is located at pos 22
|
||||||
// from the man page
|
// from the man page
|
||||||
//
|
//
|
||||||
|
@ -23,5 +25,19 @@ func GetProcessStartTime(pid int) (string, error) {
|
||||||
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
|
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
|
||||||
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
|
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
|
||||||
// (divide by sysconf(_SC_CLK_TCK)).
|
// (divide by sysconf(_SC_CLK_TCK)).
|
||||||
return parts[22-1], nil // starts at 1
|
//
|
||||||
|
// NOTE:
|
||||||
|
// pos 2 could contain space and is inside `(` and `)`:
|
||||||
|
// (2) comm %s
|
||||||
|
// The filename of the executable, in parentheses.
|
||||||
|
// This is visible whether or not the executable is
|
||||||
|
// swapped out.
|
||||||
|
//
|
||||||
|
// the following is an example:
|
||||||
|
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
|
// get parts after last `)`:
|
||||||
|
s := strings.Split(stat, ")")
|
||||||
|
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
|
||||||
|
return parts[22-3], nil // starts at 3 (after the filename pos `2`)
|
||||||
}
|
}
|
||||||
|
|
2
vendor/github.com/opencontainers/runc/libcontainer/user/user.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/user/user.go
generated
vendored
|
@ -343,7 +343,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
|
||||||
if len(groups) > 0 {
|
if len(groups) > 0 {
|
||||||
// First match wins, even if there's more than one matching entry.
|
// First match wins, even if there's more than one matching entry.
|
||||||
user.Gid = groups[0].Gid
|
user.Gid = groups[0].Gid
|
||||||
} else if groupArg != "" {
|
} else {
|
||||||
// If we can't find a group with the given name, the only other valid
|
// If we can't find a group with the given name, the only other valid
|
||||||
// option is if it's a numeric group name with no associated entry in group.
|
// option is if it's a numeric group name with no associated entry in group.
|
||||||
|
|
||||||
|
|
148
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
generated
vendored
Normal file
148
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
generated
vendored
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2016 SUSE LLC
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "cmsg.h"
|
||||||
|
|
||||||
|
#define error(fmt, ...) \
|
||||||
|
({ \
|
||||||
|
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
|
||||||
|
errno = ECOMM; \
|
||||||
|
goto err; /* return value */ \
|
||||||
|
})
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sends a file descriptor along the sockfd provided. Returns the return
|
||||||
|
* value of sendmsg(2). Any synchronisation and preparation of state
|
||||||
|
* should be done external to this (we expect the other side to be in
|
||||||
|
* recvfd() in the code).
|
||||||
|
*/
|
||||||
|
ssize_t sendfd(int sockfd, struct file_t file)
|
||||||
|
{
|
||||||
|
struct msghdr msg = {0};
|
||||||
|
struct iovec iov[1] = {0};
|
||||||
|
struct cmsghdr *cmsg;
|
||||||
|
int *fdptr;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
union {
|
||||||
|
char buf[CMSG_SPACE(sizeof(file.fd))];
|
||||||
|
struct cmsghdr align;
|
||||||
|
} u;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to send some other data along with the ancillary data,
|
||||||
|
* otherwise the other side won't recieve any data. This is very
|
||||||
|
* well-hidden in the documentation (and only applies to
|
||||||
|
* SOCK_STREAM). See the bottom part of unix(7).
|
||||||
|
*/
|
||||||
|
iov[0].iov_base = file.name;
|
||||||
|
iov[0].iov_len = strlen(file.name) + 1;
|
||||||
|
|
||||||
|
msg.msg_name = NULL;
|
||||||
|
msg.msg_namelen = 0;
|
||||||
|
msg.msg_iov = iov;
|
||||||
|
msg.msg_iovlen = 1;
|
||||||
|
msg.msg_control = u.buf;
|
||||||
|
msg.msg_controllen = sizeof(u.buf);
|
||||||
|
|
||||||
|
cmsg = CMSG_FIRSTHDR(&msg);
|
||||||
|
cmsg->cmsg_level = SOL_SOCKET;
|
||||||
|
cmsg->cmsg_type = SCM_RIGHTS;
|
||||||
|
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
|
||||||
|
|
||||||
|
fdptr = (int *) CMSG_DATA(cmsg);
|
||||||
|
memcpy(fdptr, &file.fd, sizeof(int));
|
||||||
|
|
||||||
|
return sendmsg(sockfd, &msg, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Receives a file descriptor from the sockfd provided. Returns the file
|
||||||
|
* descriptor as sent from sendfd(). It will return the file descriptor
|
||||||
|
* or die (literally) trying. Any synchronisation and preparation of
|
||||||
|
* state should be done external to this (we expect the other side to be
|
||||||
|
* in sendfd() in the code).
|
||||||
|
*/
|
||||||
|
struct file_t recvfd(int sockfd)
|
||||||
|
{
|
||||||
|
struct msghdr msg = {0};
|
||||||
|
struct iovec iov[1] = {0};
|
||||||
|
struct cmsghdr *cmsg;
|
||||||
|
struct file_t file = {0};
|
||||||
|
int *fdptr;
|
||||||
|
int olderrno;
|
||||||
|
|
||||||
|
union {
|
||||||
|
char buf[CMSG_SPACE(sizeof(file.fd))];
|
||||||
|
struct cmsghdr align;
|
||||||
|
} u;
|
||||||
|
|
||||||
|
/* Allocate a buffer. */
|
||||||
|
/* TODO: Make this dynamic with MSG_PEEK. */
|
||||||
|
file.name = malloc(TAG_BUFFER);
|
||||||
|
if (!file.name)
|
||||||
|
error("recvfd: failed to allocate file.tag buffer\n");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to "recieve" the non-ancillary data even though we don't
|
||||||
|
* plan to use it at all. Otherwise, things won't work as expected.
|
||||||
|
* See unix(7) and other well-hidden documentation.
|
||||||
|
*/
|
||||||
|
iov[0].iov_base = file.name;
|
||||||
|
iov[0].iov_len = TAG_BUFFER;
|
||||||
|
|
||||||
|
msg.msg_name = NULL;
|
||||||
|
msg.msg_namelen = 0;
|
||||||
|
msg.msg_iov = iov;
|
||||||
|
msg.msg_iovlen = 1;
|
||||||
|
msg.msg_control = u.buf;
|
||||||
|
msg.msg_controllen = sizeof(u.buf);
|
||||||
|
|
||||||
|
ssize_t ret = recvmsg(sockfd, &msg, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
cmsg = CMSG_FIRSTHDR(&msg);
|
||||||
|
if (!cmsg)
|
||||||
|
error("recvfd: got NULL from CMSG_FIRSTHDR");
|
||||||
|
if (cmsg->cmsg_level != SOL_SOCKET)
|
||||||
|
error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
|
||||||
|
if (cmsg->cmsg_type != SCM_RIGHTS)
|
||||||
|
error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
|
||||||
|
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
|
||||||
|
error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len);
|
||||||
|
|
||||||
|
fdptr = (int *) CMSG_DATA(cmsg);
|
||||||
|
if (!fdptr || *fdptr < 0)
|
||||||
|
error("recvfd: recieved invalid pointer");
|
||||||
|
|
||||||
|
file.fd = *fdptr;
|
||||||
|
return file;
|
||||||
|
|
||||||
|
err:
|
||||||
|
olderrno = errno;
|
||||||
|
free(file.name);
|
||||||
|
errno = olderrno;
|
||||||
|
return (struct file_t){0};
|
||||||
|
}
|
57
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
57
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
// +build linux
|
||||||
|
|
||||||
|
package utils
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright 2016 SUSE LLC
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cmsg.h"
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
|
||||||
|
// socket. The file name of the remote file descriptor will be recreated
|
||||||
|
// locally (it is sent as non-auxiliary data in the same payload).
|
||||||
|
func RecvFd(socket *os.File) (*os.File, error) {
|
||||||
|
file, err := C.recvfd(C.int(socket.Fd()))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer C.free(unsafe.Pointer(file.name))
|
||||||
|
return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendFd sends a file descriptor over the given AF_UNIX socket. In
|
||||||
|
// addition, the file.Name() of the given file will also be sent as
|
||||||
|
// non-auxiliary data in the same payload (allowing to send contextual
|
||||||
|
// information for a file descriptor).
|
||||||
|
func SendFd(socket, file *os.File) error {
|
||||||
|
var cfile C.struct_file_t
|
||||||
|
cfile.fd = C.int(file.Fd())
|
||||||
|
cfile.name = C.CString(file.Name())
|
||||||
|
defer C.free(unsafe.Pointer(cfile.name))
|
||||||
|
|
||||||
|
_, err := C.sendfd(C.int(socket.Fd()), cfile)
|
||||||
|
return err
|
||||||
|
}
|
36
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
generated
vendored
Normal file
36
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
generated
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2016 SUSE LLC
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if !defined(CMSG_H)
|
||||||
|
#define CMSG_H
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
/* TODO: Implement this properly with MSG_PEEK. */
|
||||||
|
#define TAG_BUFFER 4096
|
||||||
|
|
||||||
|
/* This mirrors Go's (*os.File). */
|
||||||
|
struct file_t {
|
||||||
|
char *name;
|
||||||
|
int fd;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct file_t recvfd(int sockfd);
|
||||||
|
ssize_t sendfd(int sockfd, struct file_t file);
|
||||||
|
|
||||||
|
#endif /* !defined(CMSG_H) */
|
126
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
126
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/rand"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
exitSignalOffset = 128
|
||||||
|
)
|
||||||
|
|
||||||
|
// GenerateRandomName returns a new name joined with a prefix. This size
|
||||||
|
// specified is used to truncate the randomly generated value
|
||||||
|
func GenerateRandomName(prefix string, size int) (string, error) {
|
||||||
|
id := make([]byte, 32)
|
||||||
|
if _, err := io.ReadFull(rand.Reader, id); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if size > 64 {
|
||||||
|
size = 64
|
||||||
|
}
|
||||||
|
return prefix + hex.EncodeToString(id)[:size], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResolveRootfs ensures that the current working directory is
|
||||||
|
// not a symlink and returns the absolute path to the rootfs
|
||||||
|
func ResolveRootfs(uncleanRootfs string) (string, error) {
|
||||||
|
rootfs, err := filepath.Abs(uncleanRootfs)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return filepath.EvalSymlinks(rootfs)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExitStatus returns the correct exit status for a process based on if it
|
||||||
|
// was signaled or exited cleanly
|
||||||
|
func ExitStatus(status syscall.WaitStatus) int {
|
||||||
|
if status.Signaled() {
|
||||||
|
return exitSignalOffset + int(status.Signal())
|
||||||
|
}
|
||||||
|
return status.ExitStatus()
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteJSON writes the provided struct v to w using standard json marshaling
|
||||||
|
func WriteJSON(w io.Writer, v interface{}) error {
|
||||||
|
data, err := json.Marshal(v)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = w.Write(data)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// CleanPath makes a path safe for use with filepath.Join. This is done by not
|
||||||
|
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||||
|
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||||
|
// path resulting from prepending another path will always resolve to lexically
|
||||||
|
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||||
|
// that include symlinks won't be safe as a result of using CleanPath.
|
||||||
|
func CleanPath(path string) string {
|
||||||
|
// Deal with empty strings nicely.
|
||||||
|
if path == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that all paths are cleaned (especially problematic ones like
|
||||||
|
// "/../../../../../" which can cause lots of issues).
|
||||||
|
path = filepath.Clean(path)
|
||||||
|
|
||||||
|
// If the path isn't absolute, we need to do more processing to fix paths
|
||||||
|
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||||
|
// paths to relative ones.
|
||||||
|
if !filepath.IsAbs(path) {
|
||||||
|
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||||
|
// This can't fail, as (by definition) all paths are relative to root.
|
||||||
|
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean the path again for good measure.
|
||||||
|
return filepath.Clean(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchLabels searches a list of key-value pairs for the provided key and
|
||||||
|
// returns the corresponding value. The pairs must be separated with '='.
|
||||||
|
func SearchLabels(labels []string, query string) string {
|
||||||
|
for _, l := range labels {
|
||||||
|
parts := strings.SplitN(l, "=", 2)
|
||||||
|
if len(parts) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if parts[0] == query {
|
||||||
|
return parts[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Annotations returns the bundle path and user defined annotations from the
|
||||||
|
// libcontainer state. We need to remove the bundle because that is a label
|
||||||
|
// added by libcontainer.
|
||||||
|
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
||||||
|
userAnnotations = make(map[string]string)
|
||||||
|
for _, l := range labels {
|
||||||
|
parts := strings.SplitN(l, "=", 2)
|
||||||
|
if len(parts) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if parts[0] == "bundle" {
|
||||||
|
bundle = parts[1]
|
||||||
|
} else {
|
||||||
|
userAnnotations[parts[0]] = parts[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIntSize() int {
|
||||||
|
return int(unsafe.Sizeof(1))
|
||||||
|
}
|
33
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
33
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"strconv"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
func CloseExecFrom(minFd int) error {
|
||||||
|
fdList, err := ioutil.ReadDir("/proc/self/fd")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, fi := range fdList {
|
||||||
|
fd, err := strconv.Atoi(fi.Name())
|
||||||
|
if err != nil {
|
||||||
|
// ignore non-numeric file names
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if fd < minFd {
|
||||||
|
// ignore descriptors lower than our specified minimum
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// intentionally ignore errors from syscall.CloseOnExec
|
||||||
|
syscall.CloseOnExec(fd)
|
||||||
|
// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
Loading…
Reference in a new issue