Update runc to ce450bcc6c135cae93ee2a99d41a308c179ff6dc
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
		
							parent
							
								
									e09b0b0c35
								
							
						
					
					
						commit
						271cac8634
					
				
					 13 changed files with 1254 additions and 5 deletions
				
			
		|  | @ -1,5 +1,5 @@ | |||
| # go-runc client for runc; master as of 01/20/2017 | ||||
| github.com/crosbymichael/go-runc afca56d262e694d9056e937a0877a39ab879aeb4 | ||||
| github.com/crosbymichael/go-runc 7b66c5da30493c5eb9c655cab67ba88071891ac5 | ||||
| # go-metrics client to prometheus; master as of 12/16/2016 | ||||
| github.com/docker/go-metrics 0f35294225552d968a13f9c5bc71a3fa44b2eb87 | ||||
| # prometheus client; latest release as of 12/16/2016 | ||||
|  | @ -31,7 +31,7 @@ github.com/nats-io/go-nats-streaming v0.3.4 | |||
| # gnatsd; latest release as of 12/16/2016 | ||||
| github.com/nats-io/gnatsd v0.9.6 | ||||
| # runc, latest release as of 12/16/2016 | ||||
| github.com/opencontainers/runc v1.0.0-rc2 | ||||
| github.com/opencontainers/runc ce450bcc6c135cae93ee2a99d41a308c179ff6dc | ||||
| # OCI runtime spec, latest release as of 12/16/2016 | ||||
| github.com/opencontainers/runtime-spec v1.0.0-rc3 | ||||
| # logrus, latest release as of 12/16/2016 | ||||
|  |  | |||
							
								
								
									
										32
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| #ifndef NSENTER_NAMESPACE_H | ||||
| #define NSENTER_NAMESPACE_H | ||||
| 
 | ||||
| #ifndef _GNU_SOURCE | ||||
| #	define _GNU_SOURCE | ||||
| #endif | ||||
| #include <sched.h> | ||||
| 
 | ||||
| /* All of these are taken from include/uapi/linux/sched.h */ | ||||
| #ifndef CLONE_NEWNS | ||||
| #	define CLONE_NEWNS 0x00020000 /* New mount namespace group */ | ||||
| #endif | ||||
| #ifndef CLONE_NEWCGROUP | ||||
| #	define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ | ||||
| #endif | ||||
| #ifndef CLONE_NEWUTS | ||||
| #	define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ | ||||
| #endif | ||||
| #ifndef CLONE_NEWIPC | ||||
| #	define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ | ||||
| #endif | ||||
| #ifndef CLONE_NEWUSER | ||||
| #	define CLONE_NEWUSER 0x10000000 /* New user namespace */ | ||||
| #endif | ||||
| #ifndef CLONE_NEWPID | ||||
| #	define CLONE_NEWPID 0x20000000 /* New pid namespace */ | ||||
| #endif | ||||
| #ifndef CLONE_NEWNET | ||||
| #	define CLONE_NEWNET 0x40000000 /* New network namespace */ | ||||
| #endif | ||||
| 
 | ||||
| #endif /* NSENTER_NAMESPACE_H */ | ||||
							
								
								
									
										12
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | |||
| // +build linux,!gccgo | ||||
| 
 | ||||
| package nsenter | ||||
| 
 | ||||
| /* | ||||
| #cgo CFLAGS: -Wall | ||||
| extern void nsexec(); | ||||
| void __attribute__((constructor)) init(void) { | ||||
| 	nsexec(); | ||||
| } | ||||
| */ | ||||
| import "C" | ||||
							
								
								
									
										25
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,25 @@ | |||
| // +build linux,gccgo | ||||
| 
 | ||||
| package nsenter | ||||
| 
 | ||||
| /* | ||||
| #cgo CFLAGS: -Wall | ||||
| extern void nsexec(); | ||||
| void __attribute__((constructor)) init(void) { | ||||
| 	nsexec(); | ||||
| } | ||||
| */ | ||||
| import "C" | ||||
| 
 | ||||
| // AlwaysFalse is here to stay false | ||||
| // (and be exported so the compiler doesn't optimize out its reference) | ||||
| var AlwaysFalse bool | ||||
| 
 | ||||
| func init() { | ||||
| 	if AlwaysFalse { | ||||
| 		// by referencing this C init() in a noop test, it will ensure the compiler | ||||
| 		// links in the C function. | ||||
| 		// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 | ||||
| 		C.init() | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| // +build !linux !cgo | ||||
| 
 | ||||
| package nsenter | ||||
| 
 | ||||
| import "C" | ||||
							
								
								
									
										759
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										759
									
								
								vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,759 @@ | |||
| #define _GNU_SOURCE | ||||
| #include <endian.h> | ||||
| #include <errno.h> | ||||
| #include <fcntl.h> | ||||
| #include <grp.h> | ||||
| #include <sched.h> | ||||
| #include <setjmp.h> | ||||
| #include <signal.h> | ||||
| #include <stdarg.h> | ||||
| #include <stdbool.h> | ||||
| #include <stdint.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <stdbool.h> | ||||
| #include <string.h> | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| #include <sys/ioctl.h> | ||||
| #include <sys/prctl.h> | ||||
| #include <sys/socket.h> | ||||
| #include <sys/types.h> | ||||
| 
 | ||||
| #include <linux/limits.h> | ||||
| #include <linux/netlink.h> | ||||
| #include <linux/types.h> | ||||
| 
 | ||||
| /* Get all of the CLONE_NEW* flags. */ | ||||
| #include "namespace.h" | ||||
| 
 | ||||
| /* Synchronisation values. */ | ||||
| enum sync_t { | ||||
| 	SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */ | ||||
| 	SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */ | ||||
| 	SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ | ||||
| 	SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ | ||||
| 	SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */ | ||||
| 
 | ||||
| 	/* XXX: This doesn't help with segfaults and other such issues. */ | ||||
| 	SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ | ||||
| }; | ||||
| 
 | ||||
| /* longjmp() arguments. */ | ||||
| #define JUMP_PARENT 0x00 | ||||
| #define JUMP_CHILD  0xA0 | ||||
| #define JUMP_INIT   0xA1 | ||||
| 
 | ||||
| /* JSON buffer. */ | ||||
| #define JSON_MAX 4096 | ||||
| 
 | ||||
| /* Assume the stack grows down, so arguments should be above it. */ | ||||
| struct clone_t { | ||||
| 	/*
 | ||||
| 	 * Reserve some space for clone() to locate arguments | ||||
| 	 * and retcode in this place | ||||
| 	 */ | ||||
| 	char stack[4096] __attribute__ ((aligned(16))); | ||||
| 	char stack_ptr[0]; | ||||
| 
 | ||||
| 	/* There's two children. This is used to execute the different code. */ | ||||
| 	jmp_buf *env; | ||||
| 	int jmpval; | ||||
| }; | ||||
| 
 | ||||
| struct nlconfig_t { | ||||
| 	char *data; | ||||
| 	uint32_t cloneflags; | ||||
| 	char *uidmap; | ||||
| 	size_t uidmap_len; | ||||
| 	char *gidmap; | ||||
| 	size_t gidmap_len; | ||||
| 	char *namespaces; | ||||
| 	size_t namespaces_len; | ||||
| 	uint8_t is_setgroup; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * List of netlink message types sent to us as part of bootstrapping the init. | ||||
|  * These constants are defined in libcontainer/message_linux.go. | ||||
|  */ | ||||
| #define INIT_MSG		62000 | ||||
| #define CLONE_FLAGS_ATTR	27281 | ||||
| #define NS_PATHS_ATTR		27282 | ||||
| #define UIDMAP_ATTR		27283 | ||||
| #define GIDMAP_ATTR		27284 | ||||
| #define SETGROUP_ATTR		27285 | ||||
| 
 | ||||
| /*
 | ||||
|  * Use the raw syscall for versions of glibc which don't include a function for | ||||
|  * it, namely (glibc 2.12). | ||||
|  */ | ||||
| #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 | ||||
| #	define _GNU_SOURCE | ||||
| #	include "syscall.h" | ||||
| #	if !defined(SYS_setns) && defined(__NR_setns) | ||||
| #		define SYS_setns __NR_setns | ||||
| #	endif | ||||
| 
 | ||||
| #ifndef SYS_setns | ||||
| #	error "setns(2) syscall not supported by glibc version" | ||||
| #endif | ||||
| 
 | ||||
| int setns(int fd, int nstype) | ||||
| { | ||||
| 	return syscall(SYS_setns, fd, nstype); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /* XXX: This is ugly. */ | ||||
| static int syncfd = -1; | ||||
| 
 | ||||
| /* TODO(cyphar): Fix this so it correctly deals with syncT. */ | ||||
| #define bail(fmt, ...)								\ | ||||
| 	do {									\ | ||||
| 		int ret = __COUNTER__ + 1;					\ | ||||
| 		fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__);	\ | ||||
| 		if (syncfd >= 0) {						\ | ||||
| 			enum sync_t s = SYNC_ERR;				\ | ||||
| 			if (write(syncfd, &s, sizeof(s)) != sizeof(s))		\ | ||||
| 				fprintf(stderr, "nsenter: failed: write(s)");	\ | ||||
| 			if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret))	\ | ||||
| 				fprintf(stderr, "nsenter: failed: write(ret)");	\ | ||||
| 		}								\ | ||||
| 		exit(ret);							\ | ||||
| 	} while(0) | ||||
| 
 | ||||
| static int write_file(char *data, size_t data_len, char *pathfmt, ...) | ||||
| { | ||||
| 	int fd, len, ret = 0; | ||||
| 	char path[PATH_MAX]; | ||||
| 
 | ||||
| 	va_list ap; | ||||
| 	va_start(ap, pathfmt); | ||||
| 	len = vsnprintf(path, PATH_MAX, pathfmt, ap); | ||||
| 	va_end(ap); | ||||
| 	if (len < 0) | ||||
| 		return -1; | ||||
| 
 | ||||
| 	fd = open(path, O_RDWR); | ||||
| 	if (fd < 0) { | ||||
| 		ret = -1; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	len = write(fd, data, data_len); | ||||
| 	if (len != data_len) { | ||||
| 		ret = -1; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	close(fd); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| enum policy_t { | ||||
| 	SETGROUPS_DEFAULT = 0, | ||||
| 	SETGROUPS_ALLOW, | ||||
| 	SETGROUPS_DENY, | ||||
| }; | ||||
| 
 | ||||
| /* This *must* be called before we touch gid_map. */ | ||||
| static void update_setgroups(int pid, enum policy_t setgroup) | ||||
| { | ||||
| 	char *policy; | ||||
| 
 | ||||
| 	switch (setgroup) { | ||||
| 		case SETGROUPS_ALLOW: | ||||
| 			policy = "allow"; | ||||
| 			break; | ||||
| 		case SETGROUPS_DENY: | ||||
| 			policy = "deny"; | ||||
| 			break; | ||||
| 		case SETGROUPS_DEFAULT: | ||||
| 			/* Nothing to do. */ | ||||
| 			return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) { | ||||
| 		/*
 | ||||
| 		 * If the kernel is too old to support /proc/pid/setgroups, | ||||
| 		 * open(2) or write(2) will return ENOENT. This is fine. | ||||
| 		 */ | ||||
| 		if (errno != ENOENT) | ||||
| 			bail("failed to write '%s' to /proc/%d/setgroups", policy, pid); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void update_uidmap(int pid, char *map, int map_len) | ||||
| { | ||||
| 	if (map == NULL || map_len <= 0) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) | ||||
| 		bail("failed to update /proc/%d/uid_map", pid); | ||||
| } | ||||
| 
 | ||||
| static void update_gidmap(int pid, char *map, int map_len) | ||||
| { | ||||
| 	if (map == NULL || map_len <= 0) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) | ||||
| 		bail("failed to update /proc/%d/gid_map", pid); | ||||
| } | ||||
| 
 | ||||
| /* A dummy function that just jumps to the given jumpval. */ | ||||
| static int child_func(void *arg) __attribute__ ((noinline)); | ||||
| static int child_func(void *arg) | ||||
| { | ||||
| 	struct clone_t *ca = (struct clone_t *)arg; | ||||
| 	longjmp(*ca->env, ca->jmpval); | ||||
| } | ||||
| 
 | ||||
| static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline)); | ||||
| static int clone_parent(jmp_buf *env, int jmpval) | ||||
| { | ||||
| 	struct clone_t ca = { | ||||
| 		.env    = env, | ||||
| 		.jmpval = jmpval, | ||||
| 	}; | ||||
| 
 | ||||
| 	return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Gets the init pipe fd from the environment, which is used to read the | ||||
|  * bootstrap data and tell the parent what the new pid is after we finish | ||||
|  * setting up the environment. | ||||
|  */ | ||||
| static int initpipe(void) | ||||
| { | ||||
| 	int pipenum; | ||||
| 	char *initpipe, *endptr; | ||||
| 
 | ||||
| 	initpipe = getenv("_LIBCONTAINER_INITPIPE"); | ||||
| 	if (initpipe == NULL || *initpipe == '\0') | ||||
| 		return -1; | ||||
| 
 | ||||
| 	pipenum = strtol(initpipe, &endptr, 10); | ||||
| 	if (*endptr != '\0') | ||||
| 		bail("unable to parse _LIBCONTAINER_INITPIPE"); | ||||
| 
 | ||||
| 	return pipenum; | ||||
| } | ||||
| 
 | ||||
| /* Returns the clone(2) flag for a namespace, given the name of a namespace. */ | ||||
| static int nsflag(char *name) | ||||
| { | ||||
| 	if (!strcmp(name, "cgroup")) | ||||
| 		return CLONE_NEWCGROUP; | ||||
| 	else if (!strcmp(name, "ipc")) | ||||
| 		return CLONE_NEWIPC; | ||||
| 	else if (!strcmp(name, "mnt")) | ||||
| 		return CLONE_NEWNS; | ||||
| 	else if (!strcmp(name, "net")) | ||||
| 		return CLONE_NEWNET; | ||||
| 	else if (!strcmp(name, "pid")) | ||||
| 		return CLONE_NEWPID; | ||||
| 	else if (!strcmp(name, "user")) | ||||
| 		return CLONE_NEWUSER; | ||||
| 	else if (!strcmp(name, "uts")) | ||||
| 		return CLONE_NEWUTS; | ||||
| 
 | ||||
| 	/* If we don't recognise a name, fallback to 0. */ | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static uint32_t readint32(char *buf) | ||||
| { | ||||
| 	return *(uint32_t *) buf; | ||||
| } | ||||
| 
 | ||||
| static uint8_t readint8(char *buf) | ||||
| { | ||||
| 	return *(uint8_t *) buf; | ||||
| } | ||||
| 
 | ||||
| static void nl_parse(int fd, struct nlconfig_t *config) | ||||
| { | ||||
| 	size_t len, size; | ||||
| 	struct nlmsghdr hdr; | ||||
| 	char *data, *current; | ||||
| 
 | ||||
| 	/* Retrieve the netlink header. */ | ||||
| 	len = read(fd, &hdr, NLMSG_HDRLEN); | ||||
| 	if (len != NLMSG_HDRLEN) | ||||
| 		bail("invalid netlink header length %lu", len); | ||||
| 
 | ||||
| 	if (hdr.nlmsg_type == NLMSG_ERROR) | ||||
| 		bail("failed to read netlink message"); | ||||
| 
 | ||||
| 	if (hdr.nlmsg_type != INIT_MSG) | ||||
| 		bail("unexpected msg type %d", hdr.nlmsg_type); | ||||
| 
 | ||||
| 	/* Retrieve data. */ | ||||
| 	size = NLMSG_PAYLOAD(&hdr, 0); | ||||
| 	current = data = malloc(size); | ||||
| 	if (!data) | ||||
| 		bail("failed to allocate %zu bytes of memory for nl_payload", size); | ||||
| 
 | ||||
| 	len = read(fd, data, size); | ||||
| 	if (len != size) | ||||
| 		bail("failed to read netlink payload, %lu != %lu", len, size); | ||||
| 
 | ||||
| 	/* Parse the netlink payload. */ | ||||
| 	config->data = data; | ||||
| 	while (current < data + size) { | ||||
| 		struct nlattr *nlattr = (struct nlattr *)current; | ||||
| 		size_t payload_len = nlattr->nla_len - NLA_HDRLEN; | ||||
| 
 | ||||
| 		/* Advance to payload. */ | ||||
| 		current += NLA_HDRLEN; | ||||
| 
 | ||||
| 		/* Handle payload. */ | ||||
| 		switch (nlattr->nla_type) { | ||||
| 		case CLONE_FLAGS_ATTR: | ||||
| 			config->cloneflags = readint32(current); | ||||
| 			break; | ||||
| 		case NS_PATHS_ATTR: | ||||
| 			config->namespaces = current; | ||||
| 			config->namespaces_len = payload_len; | ||||
| 			break; | ||||
| 		case UIDMAP_ATTR: | ||||
| 			config->uidmap = current; | ||||
| 			config->uidmap_len = payload_len; | ||||
| 			break; | ||||
| 		case GIDMAP_ATTR: | ||||
| 			config->gidmap = current; | ||||
| 			config->gidmap_len = payload_len; | ||||
| 			break; | ||||
| 		case SETGROUP_ATTR: | ||||
| 			config->is_setgroup = readint8(current); | ||||
| 			break; | ||||
| 		default: | ||||
| 			bail("unknown netlink message type %d", nlattr->nla_type); | ||||
| 		} | ||||
| 
 | ||||
| 		current += NLA_ALIGN(payload_len); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void nl_free(struct nlconfig_t *config) | ||||
| { | ||||
| 	free(config->data); | ||||
| } | ||||
| 
 | ||||
| void join_namespaces(char *nslist) | ||||
| { | ||||
| 	int num = 0, i; | ||||
| 	char *saveptr = NULL; | ||||
| 	char *namespace = strtok_r(nslist, ",", &saveptr); | ||||
| 	struct namespace_t { | ||||
| 		int fd; | ||||
| 		int ns; | ||||
| 		char type[PATH_MAX]; | ||||
| 		char path[PATH_MAX]; | ||||
| 	} *namespaces = NULL; | ||||
| 
 | ||||
| 	if (!namespace || !strlen(namespace) || !strlen(nslist)) | ||||
| 		bail("ns paths are empty"); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We have to open the file descriptors first, since after | ||||
| 	 * we join the mnt namespace we might no longer be able to | ||||
| 	 * access the paths. | ||||
| 	 */ | ||||
| 	do { | ||||
| 		int fd; | ||||
| 		char *path; | ||||
| 		struct namespace_t *ns; | ||||
| 
 | ||||
| 		/* Resize the namespace array. */ | ||||
| 		namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); | ||||
| 		if (!namespaces) | ||||
| 			bail("failed to reallocate namespace array"); | ||||
| 		ns = &namespaces[num - 1]; | ||||
| 
 | ||||
| 		/* Split 'ns:path'. */ | ||||
| 		path = strstr(namespace, ":"); | ||||
| 		if (!path) | ||||
| 			bail("failed to parse %s", namespace); | ||||
| 		*path++ = '\0'; | ||||
| 
 | ||||
| 		fd = open(path, O_RDONLY); | ||||
| 		if (fd < 0) | ||||
| 			bail("failed to open %s", path); | ||||
| 
 | ||||
| 		ns->fd = fd; | ||||
| 		ns->ns = nsflag(namespace); | ||||
| 		strncpy(ns->path, path, PATH_MAX); | ||||
| 	} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The ordering in which we join namespaces is important. We should | ||||
| 	 * always join the user namespace *first*. This is all guaranteed | ||||
| 	 * from the container_linux.go side of this, so we're just going to | ||||
| 	 * follow the order given to us. | ||||
| 	 */ | ||||
| 
 | ||||
| 	for (i = 0; i < num; i++) { | ||||
| 		struct namespace_t ns = namespaces[i]; | ||||
| 
 | ||||
| 		if (setns(ns.fd, ns.ns) < 0) | ||||
| 			bail("failed to setns to %s", ns.path); | ||||
| 
 | ||||
| 		close(ns.fd); | ||||
| 	} | ||||
| 
 | ||||
| 	free(namespaces); | ||||
| } | ||||
| 
 | ||||
| void nsexec(void) | ||||
| { | ||||
| 	int pipenum; | ||||
| 	jmp_buf env; | ||||
| 	int syncpipe[2]; | ||||
| 	struct nlconfig_t config = {0}; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we don't have an init pipe, just return to the go routine. | ||||
| 	 * We'll only get an init pipe for start or exec. | ||||
| 	 */ | ||||
| 	pipenum = initpipe(); | ||||
| 	if (pipenum == -1) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* make the process non-dumpable */ | ||||
| 	if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) { | ||||
| 		bail("failed to set process as non-dumpable"); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Parse all of the netlink configuration. */ | ||||
| 	nl_parse(pipenum, &config); | ||||
| 
 | ||||
| 	/* Pipe so we can tell the child when we've finished setting up. */ | ||||
| 	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0) | ||||
| 		bail("failed to setup sync pipe between parent and child"); | ||||
| 
 | ||||
| 	/* TODO: Currently we aren't dealing with child deaths properly. */ | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Okay, so this is quite annoying. | ||||
| 	 * | ||||
| 	 * In order for this unsharing code to be more extensible we need to split | ||||
| 	 * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case | ||||
| 	 * would be if we did clone(CLONE_NEWUSER) and the other namespaces | ||||
| 	 * separately, but because of SELinux issues we cannot really do that. But | ||||
| 	 * we cannot just dump the namespace flags into clone(...) because several | ||||
| 	 * usecases (such as rootless containers) require more granularity around | ||||
| 	 * the namespace setup. In addition, some older kernels had issues where | ||||
| 	 * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot | ||||
| 	 * handle this while also dealing with SELinux so we choose SELinux support | ||||
| 	 * over broken kernel support). | ||||
| 	 * | ||||
| 	 * However, if we unshare(2) the user namespace *before* we clone(2), then | ||||
| 	 * all hell breaks loose. | ||||
| 	 * | ||||
| 	 * The parent no longer has permissions to do many things (unshare(2) drops | ||||
| 	 * all capabilities in your old namespace), and the container cannot be set | ||||
| 	 * up to have more than one {uid,gid} mapping. This is obviously less than | ||||
| 	 * ideal. In order to fix this, we have to first clone(2) and then unshare. | ||||
| 	 * | ||||
| 	 * Unfortunately, it's not as simple as that. We have to fork to enter the | ||||
| 	 * PID namespace (the PID namespace only applies to children). Since we'll | ||||
| 	 * have to double-fork, this clone_parent() call won't be able to get the | ||||
| 	 * PID of the _actual_ init process (without doing more synchronisation than | ||||
| 	 * I can deal with at the moment). So we'll just get the parent to send it | ||||
| 	 * for us, the only job of this process is to update | ||||
| 	 * /proc/pid/{setgroups,uid_map,gid_map}. | ||||
| 	 * | ||||
| 	 * And as a result of the above, we also need to setns(2) in the first child | ||||
| 	 * because if we join a PID namespace in the topmost parent then our child | ||||
| 	 * will be in that namespace (and it will not be able to give us a PID value | ||||
| 	 * that makes sense without resorting to sending things with cmsg). | ||||
| 	 * | ||||
| 	 * This also deals with an older issue caused by dumping cloneflags into | ||||
| 	 * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so | ||||
| 	 * we have to unshare(2) before clone(2) in order to do this. This was fixed | ||||
| 	 * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was | ||||
| 	 * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're | ||||
| 	 * aware, the last mainline kernel which had this bug was Linux 3.12. | ||||
| 	 * However, we cannot comment on which kernels the broken patch was | ||||
| 	 * backported to. | ||||
| 	 * | ||||
| 	 * -- Aleksa "what has my life come to?" Sarai | ||||
| 	 */ | ||||
| 
 | ||||
| 	switch (setjmp(env)) { | ||||
| 	/*
 | ||||
| 	 * Stage 0: We're in the parent. Our job is just to create a new child | ||||
| 	 *          (stage 1: JUMP_CHILD) process and write its uid_map and | ||||
| 	 *          gid_map. That process will go on to create a new process, then | ||||
| 	 *          it will send us its PID which we will send to the bootstrap | ||||
| 	 *          process. | ||||
| 	 */ | ||||
| 	case JUMP_PARENT: { | ||||
| 			int len, ready = 0; | ||||
| 			pid_t child; | ||||
| 			char buf[JSON_MAX]; | ||||
| 
 | ||||
| 			/* For debugging. */ | ||||
| 			prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0); | ||||
| 
 | ||||
| 			/* Start the process of getting a container. */ | ||||
| 			child = clone_parent(&env, JUMP_CHILD); | ||||
| 			if (child < 0) | ||||
| 				bail("unable to fork: child_func"); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * State machine for synchronisation with the children. | ||||
| 			 * | ||||
| 			 * Father only return when both child and grandchild are | ||||
| 			 * ready, so we can receive all possible error codes | ||||
| 			 * generated by children. | ||||
| 			 */ | ||||
| 			while (ready < 2) { | ||||
| 				enum sync_t s; | ||||
| 
 | ||||
| 				/* This doesn't need to be global, we're in the parent. */ | ||||
| 				int syncfd = syncpipe[1]; | ||||
| 
 | ||||
| 				if (read(syncfd, &s, sizeof(s)) != sizeof(s)) | ||||
| 					bail("failed to sync with child: next state"); | ||||
| 
 | ||||
| 				switch (s) { | ||||
| 				case SYNC_ERR: { | ||||
| 						/* We have to mirror the error code of the child. */ | ||||
| 						int ret; | ||||
| 
 | ||||
| 						if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) | ||||
| 							bail("failed to sync with child: read(error code)"); | ||||
| 
 | ||||
| 						exit(ret); | ||||
| 					} | ||||
| 					break; | ||||
| 				case SYNC_USERMAP_PLS: | ||||
| 					/* Enable setgroups(2) if we've been asked to. */ | ||||
| 					if (config.is_setgroup) | ||||
| 						update_setgroups(child, SETGROUPS_ALLOW); | ||||
| 
 | ||||
| 					/* Set up mappings. */ | ||||
| 					update_uidmap(child, config.uidmap, config.uidmap_len); | ||||
| 					update_gidmap(child, config.gidmap, config.gidmap_len); | ||||
| 
 | ||||
| 					s = SYNC_USERMAP_ACK; | ||||
| 					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { | ||||
| 						kill(child, SIGKILL); | ||||
| 						bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); | ||||
| 					} | ||||
| 					break; | ||||
| 				case SYNC_USERMAP_ACK: | ||||
| 					/* We should _never_ receive acks. */ | ||||
| 					kill(child, SIGKILL); | ||||
| 					bail("failed to sync with child: unexpected SYNC_USERMAP_ACK"); | ||||
| 					break; | ||||
| 				case SYNC_RECVPID_PLS: { | ||||
| 						pid_t old = child; | ||||
| 
 | ||||
| 						/* Get the init_func pid. */ | ||||
| 						if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { | ||||
| 							kill(old, SIGKILL); | ||||
| 							bail("failed to sync with child: read(childpid)"); | ||||
| 						} | ||||
| 
 | ||||
| 						/* Send ACK. */ | ||||
| 						s = SYNC_RECVPID_ACK; | ||||
| 						if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { | ||||
| 							kill(old, SIGKILL); | ||||
| 							kill(child, SIGKILL); | ||||
| 							bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); | ||||
| 						} | ||||
| 					} | ||||
| 
 | ||||
| 					ready++; | ||||
| 					break; | ||||
| 				case SYNC_RECVPID_ACK: | ||||
| 					/* We should _never_ receive acks. */ | ||||
| 					kill(child, SIGKILL); | ||||
| 					bail("failed to sync with child: unexpected SYNC_RECVPID_ACK"); | ||||
| 					break; | ||||
| 				case SYNC_CHILD_READY: | ||||
| 					ready++; | ||||
| 					break; | ||||
| 				default: | ||||
| 					bail("unexpected sync value"); | ||||
| 					break; | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 			/* Send the init_func pid back to our parent. */ | ||||
| 			len = snprintf(buf, JSON_MAX, "{\"pid\": %d}\n", child); | ||||
| 			if (len < 0) { | ||||
| 				kill(child, SIGKILL); | ||||
| 				bail("unable to generate JSON for child pid"); | ||||
| 			} | ||||
| 			if (write(pipenum, buf, len) != len) { | ||||
| 				kill(child, SIGKILL); | ||||
| 				bail("unable to send child pid to bootstrapper"); | ||||
| 			} | ||||
| 
 | ||||
| 			exit(0); | ||||
| 		} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Stage 1: We're in the first child process. Our job is to join any | ||||
| 	 *          provided namespaces in the netlink payload and unshare all | ||||
| 	 *          of the requested namespaces. If we've been asked to | ||||
| 	 *          CLONE_NEWUSER, we will ask our parent (stage 0) to set up | ||||
| 	 *          our user mappings for us. Then, we create a new child | ||||
| 	 *          (stage 2: JUMP_INIT) for PID namespace. We then send the | ||||
| 	 *          child's PID to our parent (stage 0). | ||||
| 	 */ | ||||
| 	case JUMP_CHILD: { | ||||
| 			pid_t child; | ||||
| 			enum sync_t s; | ||||
| 
 | ||||
| 			/* We're in a child and thus need to tell the parent if we die. */ | ||||
| 			syncfd = syncpipe[0]; | ||||
| 
 | ||||
| 			/* For debugging. */ | ||||
| 			prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * We need to setns first. We cannot do this earlier (in stage 0) | ||||
| 			 * because of the fact that we forked to get here (the PID of | ||||
| 			 * [stage 2: JUMP_INIT]) would be meaningless). We could send it | ||||
| 			 * using cmsg(3) but that's just annoying. | ||||
| 			 */ | ||||
| 			if (config.namespaces) | ||||
| 				join_namespaces(config.namespaces); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Unshare all of the namespaces. Now, it should be noted that this | ||||
| 			 * ordering might break in the future (especially with rootless | ||||
| 			 * containers). But for now, it's not possible to split this into | ||||
| 			 * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. | ||||
| 			 * | ||||
| 			 * Note that we don't merge this with clone() because there were | ||||
| 			 * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) | ||||
| 			 * was broken, so we'll just do it the long way anyway. | ||||
| 			 */ | ||||
| 			if (unshare(config.cloneflags) < 0) | ||||
| 				bail("failed to unshare namespaces"); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Deal with user namespaces first. They are quite special, as they | ||||
| 			 * affect our ability to unshare other namespaces and are used as | ||||
| 			 * context for privilege checks. | ||||
| 			 */ | ||||
| 			if (config.cloneflags & CLONE_NEWUSER) { | ||||
| 				/*
 | ||||
| 				 * We don't have the privileges to do any mapping here (see the | ||||
| 				 * clone_parent rant). So signal our parent to hook us up. | ||||
| 				 */ | ||||
| 
 | ||||
| 				s = SYNC_USERMAP_PLS; | ||||
| 				if (write(syncfd, &s, sizeof(s)) != sizeof(s)) | ||||
| 					bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); | ||||
| 
 | ||||
| 				/* ... wait for mapping ... */ | ||||
| 
 | ||||
| 				if (read(syncfd, &s, sizeof(s)) != sizeof(s)) | ||||
| 					bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); | ||||
| 				if (s != SYNC_USERMAP_ACK) | ||||
| 					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); | ||||
| 			} | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * TODO: What about non-namespace clone flags that we're dropping here? | ||||
| 			 * | ||||
| 			 * We fork again because of PID namespace, setns(2) or unshare(2) don't | ||||
| 			 * change the PID namespace of the calling process, because doing so | ||||
| 			 * would change the caller's idea of its own PID (as reported by getpid()), | ||||
| 			 * which would break many applications and libraries, so we must fork | ||||
| 			 * to actually enter the new PID namespace. | ||||
| 			 */ | ||||
| 			child = clone_parent(&env, JUMP_INIT); | ||||
| 			if (child < 0) | ||||
| 				bail("unable to fork: init_func"); | ||||
| 
 | ||||
| 			/* Send the child to our parent, which knows what it's doing. */ | ||||
| 			s = SYNC_RECVPID_PLS; | ||||
| 			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { | ||||
| 				kill(child, SIGKILL); | ||||
| 				bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); | ||||
| 			} | ||||
| 			if (write(syncfd, &child, sizeof(child)) != sizeof(child)) { | ||||
| 				kill(child, SIGKILL); | ||||
| 				bail("failed to sync with parent: write(childpid)"); | ||||
| 			} | ||||
| 
 | ||||
| 			/* ... wait for parent to get the pid ... */ | ||||
| 
 | ||||
| 			if (read(syncfd, &s, sizeof(s)) != sizeof(s)) { | ||||
| 				kill(child, SIGKILL); | ||||
| 				bail("failed to sync with parent: read(SYNC_RECVPID_ACK)"); | ||||
| 			} | ||||
| 			if (s != SYNC_RECVPID_ACK) { | ||||
| 				kill(child, SIGKILL); | ||||
| 				bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); | ||||
| 			} | ||||
| 
 | ||||
| 			/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ | ||||
| 			exit(0); | ||||
| 		} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Stage 2: We're the final child process, and the only process that will | ||||
| 	 *          actually return to the Go runtime. Our job is to just do the | ||||
| 	 *          final cleanup steps and then return to the Go runtime to allow | ||||
| 	 *          init_linux.go to run. | ||||
| 	 */ | ||||
| 	case JUMP_INIT: { | ||||
| 			/*
 | ||||
| 			 * We're inside the child now, having jumped from the | ||||
| 			 * start_child() code after forking in the parent. | ||||
| 			 */ | ||||
| 			enum sync_t s; | ||||
| 
 | ||||
| 			/* We're in a child and thus need to tell the parent if we die. */ | ||||
| 			syncfd = syncpipe[0]; | ||||
| 
 | ||||
| 			/* For debugging. */ | ||||
| 			prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0); | ||||
| 
 | ||||
| 			if (setsid() < 0) | ||||
| 				bail("setsid failed"); | ||||
| 
 | ||||
| 			if (setuid(0) < 0) | ||||
| 				bail("setuid failed"); | ||||
| 
 | ||||
| 			if (setgid(0) < 0) | ||||
| 				bail("setgid failed"); | ||||
| 
 | ||||
| 			if (setgroups(0, NULL) < 0) | ||||
| 				bail("setgroups failed"); | ||||
| 
 | ||||
| 			s = SYNC_CHILD_READY; | ||||
| 			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) | ||||
| 				bail("failed to sync with patent: write(SYNC_CHILD_READY)"); | ||||
| 
 | ||||
| 			/* Close sync pipes. */ | ||||
| 			close(syncpipe[0]); | ||||
| 			close(syncpipe[1]); | ||||
| 
 | ||||
| 			/* Free netlink data. */ | ||||
| 			nl_free(&config); | ||||
| 
 | ||||
| 			/* Finish executing, let the Go runtime take over. */ | ||||
| 			return; | ||||
| 		} | ||||
| 	default: | ||||
| 		bail("unexpected jump value"); | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Should never be reached. */ | ||||
| 	bail("should never be reached"); | ||||
| } | ||||
							
								
								
									
										20
									
								
								vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
									
										
									
										generated
									
									
										vendored
									
									
								
							|  | @ -14,8 +14,10 @@ func GetProcessStartTime(pid int) (string, error) { | |||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return parseStartTime(string(data)) | ||||
| } | ||||
| 
 | ||||
| 	parts := strings.Split(string(data), " ") | ||||
| func parseStartTime(stat string) (string, error) { | ||||
| 	// the starttime is located at pos 22 | ||||
| 	// from the man page | ||||
| 	// | ||||
|  | @ -23,5 +25,19 @@ func GetProcessStartTime(pid int) (string, error) { | |||
| 	// (22)  The  time the process started after system boot.  In kernels before Linux 2.6, this | ||||
| 	// value was expressed in jiffies.  Since Linux 2.6, the value is expressed in  clock  ticks | ||||
| 	// (divide by sysconf(_SC_CLK_TCK)). | ||||
| 	return parts[22-1], nil // starts at 1 | ||||
| 	// | ||||
| 	// NOTE: | ||||
| 	// pos 2 could contain space and is inside `(` and `)`: | ||||
| 	// (2) comm  %s | ||||
| 	// The filename of the executable, in parentheses. | ||||
| 	// This is visible whether or not the executable is | ||||
| 	// swapped out. | ||||
| 	// | ||||
| 	// the following is an example: | ||||
| 	// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 | ||||
| 
 | ||||
| 	// get parts after last `)`: | ||||
| 	s := strings.Split(stat, ")") | ||||
| 	parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ") | ||||
| 	return parts[22-3], nil // starts at 3 (after the filename pos `2`) | ||||
| } | ||||
|  |  | |||
							
								
								
									
										2
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user.go
									
										
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user.go
									
										
									
										generated
									
									
										vendored
									
									
								
							|  | @ -343,7 +343,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( | |||
| 			if len(groups) > 0 { | ||||
| 				// First match wins, even if there's more than one matching entry. | ||||
| 				user.Gid = groups[0].Gid | ||||
| 			} else if groupArg != "" { | ||||
| 			} else { | ||||
| 				// If we can't find a group with the given name, the only other valid | ||||
| 				// option is if it's a numeric group name with no associated entry in group. | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										148
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										148
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,148 @@ | |||
| /*
 | ||||
|  * Copyright 2016 SUSE LLC | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| #include <errno.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <sys/socket.h> | ||||
| #include <sys/types.h> | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| #include "cmsg.h" | ||||
| 
 | ||||
| #define error(fmt, ...)							\ | ||||
| 	({								\ | ||||
| 		fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \ | ||||
| 		errno = ECOMM;						\ | ||||
| 		goto err; /* return value */				\ | ||||
| 	}) | ||||
| 
 | ||||
| /*
 | ||||
|  * Sends a file descriptor along the sockfd provided. Returns the return | ||||
|  * value of sendmsg(2). Any synchronisation and preparation of state | ||||
|  * should be done external to this (we expect the other side to be in | ||||
|  * recvfd() in the code). | ||||
|  */ | ||||
| ssize_t sendfd(int sockfd, struct file_t file) | ||||
| { | ||||
| 	struct msghdr msg = {0}; | ||||
| 	struct iovec iov[1] = {0}; | ||||
| 	struct cmsghdr *cmsg; | ||||
| 	int *fdptr; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	union { | ||||
| 		char buf[CMSG_SPACE(sizeof(file.fd))]; | ||||
| 		struct cmsghdr align; | ||||
| 	} u; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We need to send some other data along with the ancillary data, | ||||
| 	 * otherwise the other side won't recieve any data. This is very | ||||
| 	 * well-hidden in the documentation (and only applies to | ||||
| 	 * SOCK_STREAM). See the bottom part of unix(7). | ||||
| 	 */ | ||||
| 	iov[0].iov_base = file.name; | ||||
| 	iov[0].iov_len = strlen(file.name) + 1; | ||||
| 
 | ||||
| 	msg.msg_name = NULL; | ||||
| 	msg.msg_namelen = 0; | ||||
| 	msg.msg_iov = iov; | ||||
| 	msg.msg_iovlen = 1; | ||||
| 	msg.msg_control = u.buf; | ||||
| 	msg.msg_controllen = sizeof(u.buf); | ||||
| 
 | ||||
| 	cmsg = CMSG_FIRSTHDR(&msg); | ||||
| 	cmsg->cmsg_level = SOL_SOCKET; | ||||
| 	cmsg->cmsg_type = SCM_RIGHTS; | ||||
| 	cmsg->cmsg_len = CMSG_LEN(sizeof(int)); | ||||
| 
 | ||||
| 	fdptr = (int *) CMSG_DATA(cmsg); | ||||
| 	memcpy(fdptr, &file.fd, sizeof(int)); | ||||
| 
 | ||||
| 	return sendmsg(sockfd, &msg, 0); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Receives a file descriptor from the sockfd provided. Returns the file | ||||
|  * descriptor as sent from sendfd(). It will return the file descriptor | ||||
|  * or die (literally) trying. Any synchronisation and preparation of | ||||
|  * state should be done external to this (we expect the other side to be | ||||
|  * in sendfd() in the code). | ||||
|  */ | ||||
| struct file_t recvfd(int sockfd) | ||||
| { | ||||
| 	struct msghdr msg = {0}; | ||||
| 	struct iovec iov[1] = {0}; | ||||
| 	struct cmsghdr *cmsg; | ||||
| 	struct file_t file = {0}; | ||||
| 	int *fdptr; | ||||
| 	int olderrno; | ||||
| 
 | ||||
| 	union { | ||||
| 		char buf[CMSG_SPACE(sizeof(file.fd))]; | ||||
| 		struct cmsghdr align; | ||||
| 	} u; | ||||
| 
 | ||||
| 	/* Allocate a buffer. */ | ||||
| 	/* TODO: Make this dynamic with MSG_PEEK. */ | ||||
| 	file.name = malloc(TAG_BUFFER); | ||||
| 	if (!file.name) | ||||
| 		error("recvfd: failed to allocate file.tag buffer\n"); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We need to "recieve" the non-ancillary data even though we don't | ||||
| 	 * plan to use it at all. Otherwise, things won't work as expected. | ||||
| 	 * See unix(7) and other well-hidden documentation. | ||||
| 	 */ | ||||
| 	iov[0].iov_base = file.name; | ||||
| 	iov[0].iov_len = TAG_BUFFER; | ||||
| 
 | ||||
| 	msg.msg_name = NULL; | ||||
| 	msg.msg_namelen = 0; | ||||
| 	msg.msg_iov = iov; | ||||
| 	msg.msg_iovlen = 1; | ||||
| 	msg.msg_control = u.buf; | ||||
| 	msg.msg_controllen = sizeof(u.buf); | ||||
| 
 | ||||
| 	ssize_t ret = recvmsg(sockfd, &msg, 0); | ||||
| 	if (ret < 0) | ||||
| 		goto err; | ||||
| 
 | ||||
| 	cmsg = CMSG_FIRSTHDR(&msg); | ||||
| 	if (!cmsg) | ||||
| 		error("recvfd: got NULL from CMSG_FIRSTHDR"); | ||||
| 	if (cmsg->cmsg_level != SOL_SOCKET) | ||||
| 		error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level); | ||||
| 	if (cmsg->cmsg_type != SCM_RIGHTS) | ||||
| 		error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type); | ||||
| 	if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) | ||||
| 		error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len); | ||||
| 
 | ||||
| 	fdptr = (int *) CMSG_DATA(cmsg); | ||||
| 	if (!fdptr || *fdptr < 0) | ||||
| 		error("recvfd: recieved invalid pointer"); | ||||
| 
 | ||||
| 	file.fd = *fdptr; | ||||
| 	return file; | ||||
| 
 | ||||
| err: | ||||
| 	olderrno = errno; | ||||
| 	free(file.name); | ||||
| 	errno = olderrno; | ||||
| 	return (struct file_t){0}; | ||||
| } | ||||
							
								
								
									
										57
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,57 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package utils | ||||
| 
 | ||||
| /* | ||||
|  * Copyright 2016 SUSE LLC | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| /* | ||||
| #include <errno.h> | ||||
| #include <stdlib.h> | ||||
| #include "cmsg.h" | ||||
| */ | ||||
| import "C" | ||||
| 
 | ||||
| import ( | ||||
| 	"os" | ||||
| 	"unsafe" | ||||
| ) | ||||
| 
 | ||||
| // RecvFd waits for a file descriptor to be sent over the given AF_UNIX | ||||
| // socket. The file name of the remote file descriptor will be recreated | ||||
| // locally (it is sent as non-auxiliary data in the same payload). | ||||
| func RecvFd(socket *os.File) (*os.File, error) { | ||||
| 	file, err := C.recvfd(C.int(socket.Fd())) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer C.free(unsafe.Pointer(file.name)) | ||||
| 	return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil | ||||
| } | ||||
| 
 | ||||
| // SendFd sends a file descriptor over the given AF_UNIX socket. In | ||||
| // addition, the file.Name() of the given file will also be sent as | ||||
| // non-auxiliary data in the same payload (allowing to send contextual | ||||
| // information for a file descriptor). | ||||
| func SendFd(socket, file *os.File) error { | ||||
| 	var cfile C.struct_file_t | ||||
| 	cfile.fd = C.int(file.Fd()) | ||||
| 	cfile.name = C.CString(file.Name()) | ||||
| 	defer C.free(unsafe.Pointer(cfile.name)) | ||||
| 
 | ||||
| 	_, err := C.sendfd(C.int(socket.Fd()), cfile) | ||||
| 	return err | ||||
| } | ||||
							
								
								
									
										36
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /*
 | ||||
|  * Copyright 2016 SUSE LLC | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #if !defined(CMSG_H) | ||||
| #define CMSG_H | ||||
| 
 | ||||
| #include <sys/types.h> | ||||
| 
 | ||||
| /* TODO: Implement this properly with MSG_PEEK. */ | ||||
| #define TAG_BUFFER 4096 | ||||
| 
 | ||||
| /* This mirrors Go's (*os.File). */ | ||||
| struct file_t { | ||||
| 	char *name; | ||||
| 	int fd; | ||||
| }; | ||||
| 
 | ||||
| struct file_t recvfd(int sockfd); | ||||
| ssize_t sendfd(int sockfd, struct file_t file); | ||||
| 
 | ||||
| #endif /* !defined(CMSG_H) */ | ||||
							
								
								
									
										126
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,126 @@ | |||
| package utils | ||||
| 
 | ||||
| import ( | ||||
| 	"crypto/rand" | ||||
| 	"encoding/hex" | ||||
| 	"encoding/json" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| 	"unsafe" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	exitSignalOffset = 128 | ||||
| ) | ||||
| 
 | ||||
| // GenerateRandomName returns a new name joined with a prefix.  This size | ||||
| // specified is used to truncate the randomly generated value | ||||
| func GenerateRandomName(prefix string, size int) (string, error) { | ||||
| 	id := make([]byte, 32) | ||||
| 	if _, err := io.ReadFull(rand.Reader, id); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	if size > 64 { | ||||
| 		size = 64 | ||||
| 	} | ||||
| 	return prefix + hex.EncodeToString(id)[:size], nil | ||||
| } | ||||
| 
 | ||||
| // ResolveRootfs ensures that the current working directory is | ||||
| // not a symlink and returns the absolute path to the rootfs | ||||
| func ResolveRootfs(uncleanRootfs string) (string, error) { | ||||
| 	rootfs, err := filepath.Abs(uncleanRootfs) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return filepath.EvalSymlinks(rootfs) | ||||
| } | ||||
| 
 | ||||
| // ExitStatus returns the correct exit status for a process based on if it | ||||
| // was signaled or exited cleanly | ||||
| func ExitStatus(status syscall.WaitStatus) int { | ||||
| 	if status.Signaled() { | ||||
| 		return exitSignalOffset + int(status.Signal()) | ||||
| 	} | ||||
| 	return status.ExitStatus() | ||||
| } | ||||
| 
 | ||||
| // WriteJSON writes the provided struct v to w using standard json marshaling | ||||
| func WriteJSON(w io.Writer, v interface{}) error { | ||||
| 	data, err := json.Marshal(v) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	_, err = w.Write(data) | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| // CleanPath makes a path safe for use with filepath.Join. This is done by not | ||||
| // only cleaning the path, but also (if the path is relative) adding a leading | ||||
| // '/' and cleaning it (then removing the leading '/'). This ensures that a | ||||
| // path resulting from prepending another path will always resolve to lexically | ||||
| // be a subdirectory of the prefixed path. This is all done lexically, so paths | ||||
| // that include symlinks won't be safe as a result of using CleanPath. | ||||
| func CleanPath(path string) string { | ||||
| 	// Deal with empty strings nicely. | ||||
| 	if path == "" { | ||||
| 		return "" | ||||
| 	} | ||||
| 
 | ||||
| 	// Ensure that all paths are cleaned (especially problematic ones like | ||||
| 	// "/../../../../../" which can cause lots of issues). | ||||
| 	path = filepath.Clean(path) | ||||
| 
 | ||||
| 	// If the path isn't absolute, we need to do more processing to fix paths | ||||
| 	// such as "../../../../<etc>/some/path". We also shouldn't convert absolute | ||||
| 	// paths to relative ones. | ||||
| 	if !filepath.IsAbs(path) { | ||||
| 		path = filepath.Clean(string(os.PathSeparator) + path) | ||||
| 		// This can't fail, as (by definition) all paths are relative to root. | ||||
| 		path, _ = filepath.Rel(string(os.PathSeparator), path) | ||||
| 	} | ||||
| 
 | ||||
| 	// Clean the path again for good measure. | ||||
| 	return filepath.Clean(path) | ||||
| } | ||||
| 
 | ||||
| // SearchLabels searches a list of key-value pairs for the provided key and | ||||
| // returns the corresponding value. The pairs must be separated with '='. | ||||
| func SearchLabels(labels []string, query string) string { | ||||
| 	for _, l := range labels { | ||||
| 		parts := strings.SplitN(l, "=", 2) | ||||
| 		if len(parts) < 2 { | ||||
| 			continue | ||||
| 		} | ||||
| 		if parts[0] == query { | ||||
| 			return parts[1] | ||||
| 		} | ||||
| 	} | ||||
| 	return "" | ||||
| } | ||||
| 
 | ||||
| // Annotations returns the bundle path and user defined annotations from the | ||||
| // libcontainer state.  We need to remove the bundle because that is a label | ||||
| // added by libcontainer. | ||||
| func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { | ||||
| 	userAnnotations = make(map[string]string) | ||||
| 	for _, l := range labels { | ||||
| 		parts := strings.SplitN(l, "=", 2) | ||||
| 		if len(parts) < 2 { | ||||
| 			continue | ||||
| 		} | ||||
| 		if parts[0] == "bundle" { | ||||
| 			bundle = parts[1] | ||||
| 		} else { | ||||
| 			userAnnotations[parts[0]] = parts[1] | ||||
| 		} | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| func GetIntSize() int { | ||||
| 	return int(unsafe.Sizeof(1)) | ||||
| } | ||||
							
								
								
									
										33
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| // +build !windows | ||||
| 
 | ||||
| package utils | ||||
| 
 | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"strconv" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| func CloseExecFrom(minFd int) error { | ||||
| 	fdList, err := ioutil.ReadDir("/proc/self/fd") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	for _, fi := range fdList { | ||||
| 		fd, err := strconv.Atoi(fi.Name()) | ||||
| 		if err != nil { | ||||
| 			// ignore non-numeric file names | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		if fd < minFd { | ||||
| 			// ignore descriptors lower than our specified minimum | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		// intentionally ignore errors from syscall.CloseOnExec | ||||
| 		syscall.CloseOnExec(fd) | ||||
| 		// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue