diff --git a/Makefile b/Makefile index c99ef08..3352159 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ static: $(BINDIR) rootfs.go @echo "+ $@" CGO_ENABLED=1 go build -tags "$(BUILDTAGS) cgo static_build" \ -ldflags "-w -extldflags -static ${LDFLAGS}" -o bin/$(notdir $(IMAGE)) . - @sudo setcap cap_chown,cap_fowner,cap_dac_override,cap_setuid,cap_setgid+ep ./bin/$(notdir $(IMAGE)) + @sudo setcap cap_chown,cap_fowner,cap_dac_override+ep ./bin/$(notdir $(IMAGE)) @echo "Static container created at: ./bin/$(notdir $(IMAGE))" @echo "Run with ./bin/$(notdir $(IMAGE))" diff --git a/README.md b/README.md index ed51d3b..dc7b695 100644 --- a/README.md +++ b/README.md @@ -128,11 +128,3 @@ the right perms on the rootfs for the userns user** - **CAP_DAC_OVERRIDE**: symlinks **These can be dropped after the rootfs is unpacked and chowned.** - -------- - -**Caps for libcontainer** - -- **CAP_SETUID**, **CAP_SETGID**: so we can write to `uid_map`, `gid_map`, in - `nsexec.c` -See: http://man7.org/linux/man-pages/man7/user_namespaces.7.html diff --git a/main.go b/main.go index cbffe05..2fe302d 100644 --- a/main.go +++ b/main.go @@ -53,9 +53,6 @@ var ( hooks specs.Hooks hookflags stringSlice - remappedUID uint32 = 886432 - remappedGID uint32 = 886432 - debug bool version bool @@ -194,11 +191,29 @@ func main() { } // set the CgroupsPath as this user - user, err := user.CurrentUser() + u, err := user.CurrentUser() if err != nil { logrus.Fatal(err) } - spec.Linux.CgroupsPath = sPtr(user.Name) + spec.Linux.CgroupsPath = sPtr(u.Name) + + // setup UID mappings + spec.Linux.UIDMappings = []specs.IDMapping{ + { + HostID: uint32(u.Uid), + ContainerID: 0, + Size: 1, + }, + } + + // setup GID mappings + spec.Linux.GIDMappings = []specs.IDMapping{ + { + HostID: uint32(u.Gid), + ContainerID: 0, + Size: 1, + }, + } if err := unpackRootfs(spec); err != nil { logrus.Fatal(err) diff --git a/rootfs_ops.go b/rootfs_ops.go index 8a4ffed..2347817 100644 --- a/rootfs_ops.go +++ b/rootfs_ops.go @@ -6,7 +6,6 @@ import ( "os" "github.com/docker/docker/pkg/archive" - "github.com/docker/docker/pkg/idtools" "github.com/opencontainers/runtime-spec/specs-go" ) @@ -16,39 +15,12 @@ func unpackRootfs(spec *specs.Spec) error { return err } - if len(spec.Linux.UIDMappings) > 0 && len(spec.Linux.GIDMappings) > 0 { - if err := idtools.MkdirAs(defaultRootfsDir, 0755, int(spec.Linux.UIDMappings[0].HostID), int(spec.Linux.GIDMappings[0].HostID)); err != nil { - return err - } - } else { - if err := os.MkdirAll(defaultRootfsDir, 0755); err != nil { - return err - } - } - - uidMaps := []idtools.IDMap{} - gidMaps := []idtools.IDMap{} - for _, u := range spec.Linux.UIDMappings { - uidMaps = append(uidMaps, idtools.IDMap{ - ContainerID: int(u.ContainerID), - HostID: int(u.HostID), - Size: int(u.Size), - }) - } - - for _, g := range spec.Linux.GIDMappings { - gidMaps = append(gidMaps, idtools.IDMap{ - ContainerID: int(g.ContainerID), - HostID: int(g.HostID), - Size: int(g.Size), - }) + if err := os.MkdirAll(defaultRootfsDir, 0755); err != nil { + return err } r := bytes.NewReader(data) - if err := archive.Untar(r, defaultRootfsDir, &archive.TarOptions{ - UIDMaps: uidMaps, - GIDMaps: gidMaps, - }); err != nil { + if err := archive.Untar(r, defaultRootfsDir, nil); err != nil { return err } diff --git a/spec.go b/spec.go index 9d3d155..0db9475 100644 --- a/spec.go +++ b/spec.go @@ -100,20 +100,6 @@ var ( }, }, Linux: specs.Linux{ - UIDMappings: []specs.IDMapping{ - { - HostID: remappedUID, - ContainerID: 0, - Size: 46578392, - }, - }, - GIDMappings: []specs.IDMapping{ - { - HostID: remappedGID, - ContainerID: 0, - Size: 46578392, - }, - }, MaskedPaths: []string{ "/proc/kcore", "/proc/latency_stats", diff --git a/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go index c7bdf1f..2b33d79 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go @@ -7,4 +7,5 @@ import "syscall" // GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building // with earlier versions func enableSetgroups(sys *syscall.SysProcAttr) { + sys.GidMappingsEnableSetgroups = false } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go index 0bde656..63c8f67 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go @@ -210,22 +210,23 @@ func setupUser(config *initConfig) error { return err } - var addGroups []int - if len(config.Config.AdditionalGroups) > 0 { - addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath) - if err != nil { - return err - } - } + /* var addGroups []int + if len(config.Config.AdditionalGroups) > 0 { + addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath) + if err != nil { + return err + } + }*/ // before we change to the container's user make sure that the processes STDIO // is correctly owned by the user that we are switching to. if err := fixStdioPermissions(execUser); err != nil { return err } - suppGroups := append(execUser.Sgids, addGroups...) - if err := syscall.Setgroups(suppGroups); err != nil { - return err - } + /* + suppGroups := append(execUser.Sgids, addGroups...) + if err := syscall.Setgroups(suppGroups); err != nil { + return err + }*/ if err := system.Setgid(execUser.Gid); err != nil { return err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index 8f37d6c..33b482f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -27,19 +27,19 @@ struct clone_arg { * Reserve some space for clone() to locate arguments * and retcode in this place */ - char stack[4096] __attribute__((aligned(16))); - char stack_ptr[0]; + char stack[4096] __attribute__ ((aligned(16))); + char stack_ptr[0]; jmp_buf *env; }; struct nsenter_config { uint32_t cloneflags; - char *uidmap; - int uidmap_len; - char *gidmap; - int gidmap_len; - uint8_t is_setgroup; - int consolefd; + char *uidmap; + int uidmap_len; + char *gidmap; + int gidmap_len; + uint8_t is_setgroup; + int consolefd; }; // list of known message types we want to send to bootstrap program @@ -55,18 +55,18 @@ struct nsenter_config { // Use raw setns syscall for versions of glibc that don't include it // (namely glibc-2.12) #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 - #define _GNU_SOURCE - #include "syscall.h" - #if defined(__NR_setns) && !defined(SYS_setns) - #define SYS_setns __NR_setns - #endif +#define _GNU_SOURCE +#include "syscall.h" +#if defined(__NR_setns) && !defined(SYS_setns) +#define SYS_setns __NR_setns +#endif - #ifdef SYS_setns - int setns(int fd, int nstype) - { - return syscall(SYS_setns, fd, nstype); - } - #endif +#ifdef SYS_setns +int setns(int fd, int nstype) +{ + return syscall(SYS_setns, fd, nstype); +} +#endif #endif #define pr_perror(fmt, ...) \ @@ -74,18 +74,18 @@ struct nsenter_config { static int child_func(void *_arg) { - struct clone_arg *arg = (struct clone_arg *)_arg; - longjmp(*arg->env, 1); + struct clone_arg *arg = (struct clone_arg *)_arg; + longjmp(*arg->env, 1); } -static int clone_parent(jmp_buf *env, int flags) __attribute__((noinline)); -static int clone_parent(jmp_buf *env, int flags) +static int clone_parent(jmp_buf * env, int flags) __attribute__ ((noinline)); +static int clone_parent(jmp_buf * env, int flags) { struct clone_arg ca; - int child; + int child; ca.env = env; - child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD | flags, + child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD | flags, &ca); // On old kernels, CLONE_PARENT cannot work with CLONE_NEWPID, // unshare before clone to workaround this. @@ -94,7 +94,7 @@ static int clone_parent(jmp_buf *env, int flags) pr_perror("Unable to unshare namespaces"); return -1; } - child = clone(child_func, ca.stack_ptr, SIGCHLD | CLONE_PARENT, + child = clone(child_func, ca.stack_ptr, SIGCHLD | CLONE_PARENT, &ca); } return child; @@ -104,9 +104,9 @@ static int clone_parent(jmp_buf *env, int flags) // write pid to after nsexec finishes setting up the environment. static int get_init_pipe() { - char buf[PATH_MAX]; - char *initpipe; - int pipenum = -1; + char buf[PATH_MAX]; + char *initpipe; + int pipenum = -1; initpipe = getenv("_LIBCONTAINER_INITPIPE"); if (initpipe == NULL) { @@ -141,19 +141,19 @@ static int num_namespaces(char *nspaths) static uint32_t readint32(char *buf) { - return *(uint32_t *)buf; + return *(uint32_t *) buf; } static uint8_t readint8(char *buf) { - return *(uint8_t *)buf; + return *(uint8_t *) buf; } static void update_process_idmap(char *pathfmt, int pid, char *map, int map_len) { - char buf[PATH_MAX]; - int len; - int fd; + char buf[PATH_MAX]; + int len; + int fd; len = snprintf(buf, sizeof(buf), pathfmt, pid); if (len < 0) { @@ -174,7 +174,7 @@ static void update_process_idmap(char *pathfmt, int pid, char *map, int map_len) exit(1); } else if (len != map_len) { pr_perror("Failed to write data to %s (%d/%d)", - buf, len, map_len); + buf, len, map_len); close(fd); exit(1); } @@ -191,51 +191,63 @@ static void update_process_uidmap(int pid, char *map, int map_len) update_process_idmap("/proc/%d/uid_map", pid, map, map_len); } -static void update_process_gidmap(int pid, uint8_t is_setgroup, char *map, int map_len) +static void proc_setgroups_write(pid_t child_pid, char *str) +{ + char setgroups_path[PATH_MAX]; + int fd; + + snprintf(setgroups_path, PATH_MAX, "/proc/%ld/setgroups", + (long)child_pid); + + fd = open(setgroups_path, O_RDWR); + if (fd == -1) { + + /* We may be on a system that doesn't support + /proc/PID/setgroups. In that case, the file won't exist, + and the system won't impose the restrictions that Linux 3.19 + added. That's fine: we don't need to do anything in order + to permit 'gid_map' to be updated. + + However, if the error from open() was something other than + the ENOENT error that is expected for that case, let the + user know. */ + + if (errno != ENOENT) + pr_perror("failed to open %s: %s\n", setgroups_path, + strerror(errno)); + return; + } + + if (write(fd, str, strlen(str)) == -1) + pr_perror("failed to write %s: %s\n", setgroups_path, + strerror(errno)); + + close(fd); +} + +static void update_process_gidmap(int pid, uint8_t is_setgroup, char *map, + int map_len) { if ((map == NULL) || (map_len <= 0)) { return; } if (is_setgroup == 1) { - int fd; - int len; - char buf[PATH_MAX]; - - len = snprintf(buf, sizeof(buf), "/proc/%d/setgroups", pid); - if (len < 0) { - pr_perror("failed to get setgroups path for %d", pid); - exit(1); - } - - fd = open(buf, O_RDWR); - if (fd == -1) { - pr_perror("failed to open %s", buf); - exit(1); - } - if (write(fd, "allow", 5) != 5) { - // If the kernel is too old to support - // /proc/PID/setgroups, write will return - // ENOENT; this is OK. - if (errno != ENOENT) { - pr_perror("failed to write allow to %s", buf); - close(fd); - exit(1); - } - } - close(fd); + proc_setgroups_write(pid, "allow"); + } else { + /* For unprivileged users we need to write to setgroups first. */ + proc_setgroups_write(pid, "deny"); } update_process_idmap("/proc/%d/gid_map", pid, map, map_len); } - -static void start_child(int pipenum, jmp_buf *env, int syncpipe[2], - struct nsenter_config *config) +static void start_child(int pipenum, jmp_buf * env, int syncpipe[2], + struct nsenter_config *config) { - int len; - int childpid; - char buf[PATH_MAX]; + int len; + int childpid; + char buf[PATH_MAX]; uint8_t syncbyte = 1; // We must fork to actually enter the PID namespace, use CLONE_PARENT @@ -246,11 +258,11 @@ static void start_child(int pipenum, jmp_buf *env, int syncpipe[2], pr_perror("Unable to fork"); exit(1); } - // update uid_map and gid_map for the child process if they // were provided update_process_uidmap(childpid, config->uidmap, config->uidmap_len); - update_process_gidmap(childpid, config->is_setgroup, config->gidmap, config->gidmap_len); + update_process_gidmap(childpid, config->is_setgroup, config->gidmap, + config->gidmap_len); // Send the sync signal to the child close(syncpipe[0]); @@ -259,7 +271,6 @@ static void start_child(int pipenum, jmp_buf *env, int syncpipe[2], pr_perror("failed to write sync byte to child"); exit(1); } - // Send the child pid back to our parent len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", childpid); if ((len < 0) || (write(pipenum, buf, len) != len)) { @@ -271,12 +282,13 @@ static void start_child(int pipenum, jmp_buf *env, int syncpipe[2], exit(0); } -static struct nsenter_config process_nl_attributes(int pipenum, char *data, int data_size) +static struct nsenter_config process_nl_attributes(int pipenum, char *data, + int data_size) { - struct nsenter_config config = {0}; - struct nlattr *nlattr; - int payload_len; - int start = 0; + struct nsenter_config config = { 0 }; + struct nlattr *nlattr; + int payload_len; + int start = 0; config.consolefd = -1; while (start < data_size) { @@ -298,12 +310,12 @@ static struct nsenter_config process_nl_attributes(int pipenum, char *data, int } else if (nlattr->nla_type == NS_PATHS_ATTR) { // if custom namespaces are required, open all // descriptors and perform setns on them - int i, j; - int nslen = num_namespaces(data + start); - int fds[nslen]; - char *nslist[nslen]; - char *ns; - char *saveptr; + int i, j; + int nslen = num_namespaces(data + start); + int fds[nslen]; + char *nslist[nslen]; + char *ns; + char *saveptr; for (i = 0; i < nslen; i++) { char *str = NULL; @@ -328,19 +340,21 @@ static struct nsenter_config process_nl_attributes(int pipenum, char *data, int for (i = 0; i < nslen; i++) { if (setns(fds[i], 0) != 0) { - pr_perror("Failed to setns to %s", nslist[i]); + pr_perror("Failed to setns to %s", + nslist[i]); exit(1); } close(fds[i]); } } else if (nlattr->nla_type == UIDMAP_ATTR) { - config.uidmap = data + start; + config.uidmap = data + start; config.uidmap_len = payload_len; } else if (nlattr->nla_type == GIDMAP_ATTR) { - config.gidmap = data + start; + config.gidmap = data + start; config.gidmap_len = payload_len; } else if (nlattr->nla_type == SETGROUP_ATTR) { config.is_setgroup = readint8(data + start); + config.is_setgroup = 0; } else { pr_perror("Unknown netlink message type %d", nlattr->nla_type); @@ -363,10 +377,9 @@ void nsexec(void) if (pipenum == -1) { return; } - // Retrieve the netlink header struct nlmsghdr nl_msg_hdr; - int len; + int len; if ((len = read(pipenum, &nl_msg_hdr, NLMSG_HDRLEN)) != NLMSG_HDRLEN) { pr_perror("Invalid netlink header length %d", len); @@ -382,9 +395,8 @@ void nsexec(void) pr_perror("Unexpected msg type %d", nl_msg_hdr.nlmsg_type); exit(1); } - // Retrieve data - int nl_total_size = NLMSG_PAYLOAD(&nl_msg_hdr, 0); + int nl_total_size = NLMSG_PAYLOAD(&nl_msg_hdr, 0); char data[nl_total_size]; if ((len = read(pipenum, data, nl_total_size)) != nl_total_size) { @@ -393,10 +405,11 @@ void nsexec(void) exit(1); } - jmp_buf env; - int syncpipe[2] = {-1, -1}; - struct nsenter_config config = process_nl_attributes(pipenum, - data, nl_total_size); + jmp_buf env; + int syncpipe[2] = { -1, -1 }; + struct nsenter_config config = process_nl_attributes(pipenum, + data, + nl_total_size); // required clone_flags to be passed if (config.cloneflags == -1) { @@ -413,7 +426,7 @@ void nsexec(void) if (setjmp(env) == 1) { // Child uint8_t s = 0; - int consolefd = config.consolefd; + int consolefd = config.consolefd; // close the writing side of pipe close(syncpipe[1]); @@ -438,10 +451,12 @@ void nsexec(void) pr_perror("setgid failed"); exit(1); } - - if (setgroups(0, NULL) == -1) { - pr_perror("setgroups failed"); - exit(1); + + if (config.is_setgroup == 1) { + if (setgroups(0, NULL) == -1) { + pr_perror("setgroups failed"); + exit(1); + } } if (consolefd != -1) { @@ -462,11 +477,9 @@ void nsexec(void) exit(1); } } - // Finish executing, let the Go runtime take over. return; } - // Parent start_child(pipenum, &env, syncpipe, &config); } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go index 1a2ee0b..8c5a8ac 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go @@ -13,6 +13,7 @@ import ( "strconv" "syscall" + "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" @@ -226,6 +227,10 @@ func (p *initProcess) execSetns() error { func (p *initProcess) start() error { defer p.parentPipe.Close() + if logrus.GetLevel() == logrus.DebugLevel { + p.cmd.Stdout = os.Stdout + p.cmd.Stderr = os.Stderr + } err := p.cmd.Start() p.process.ops = p p.childPipe.Close() diff --git a/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go index c7bdb60..b5d95ef 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go @@ -7,5 +7,5 @@ import "syscall" // Set the GidMappingsEnableSetgroups member to true, so the process's // setgroups proc entry wont be set to 'deny' if GidMappings are set func enableSetgroups(sys *syscall.SysProcAttr) { - sys.GidMappingsEnableSetgroups = true + sys.GidMappingsEnableSetgroups = false }