POC no setuid, setgid caps

Signed-off-by: Jess Frazelle <jess@mesosphere.com>
This commit is contained in:
Jess Frazelle 2016-04-17 21:13:18 -07:00
parent 69cba73cf6
commit 2b527491fe
10 changed files with 156 additions and 171 deletions

View file

@ -40,7 +40,7 @@ static: $(BINDIR) rootfs.go
@echo "+ $@" @echo "+ $@"
CGO_ENABLED=1 go build -tags "$(BUILDTAGS) cgo static_build" \ CGO_ENABLED=1 go build -tags "$(BUILDTAGS) cgo static_build" \
-ldflags "-w -extldflags -static ${LDFLAGS}" -o bin/$(notdir $(IMAGE)) . -ldflags "-w -extldflags -static ${LDFLAGS}" -o bin/$(notdir $(IMAGE)) .
@sudo setcap cap_chown,cap_fowner,cap_dac_override,cap_setuid,cap_setgid+ep ./bin/$(notdir $(IMAGE)) @sudo setcap cap_chown,cap_fowner,cap_dac_override+ep ./bin/$(notdir $(IMAGE))
@echo "Static container created at: ./bin/$(notdir $(IMAGE))" @echo "Static container created at: ./bin/$(notdir $(IMAGE))"
@echo "Run with ./bin/$(notdir $(IMAGE))" @echo "Run with ./bin/$(notdir $(IMAGE))"

View file

@ -128,11 +128,3 @@ the right perms on the rootfs for the userns user**
- **CAP_DAC_OVERRIDE**: symlinks - **CAP_DAC_OVERRIDE**: symlinks
**These can be dropped after the rootfs is unpacked and chowned.** **These can be dropped after the rootfs is unpacked and chowned.**
-------
**Caps for libcontainer**
- **CAP_SETUID**, **CAP_SETGID**: so we can write to `uid_map`, `gid_map`, in
`nsexec.c`
See: http://man7.org/linux/man-pages/man7/user_namespaces.7.html

25
main.go
View file

@ -53,9 +53,6 @@ var (
hooks specs.Hooks hooks specs.Hooks
hookflags stringSlice hookflags stringSlice
remappedUID uint32 = 886432
remappedGID uint32 = 886432
debug bool debug bool
version bool version bool
@ -194,11 +191,29 @@ func main() {
} }
// set the CgroupsPath as this user // set the CgroupsPath as this user
user, err := user.CurrentUser() u, err := user.CurrentUser()
if err != nil { if err != nil {
logrus.Fatal(err) logrus.Fatal(err)
} }
spec.Linux.CgroupsPath = sPtr(user.Name) spec.Linux.CgroupsPath = sPtr(u.Name)
// setup UID mappings
spec.Linux.UIDMappings = []specs.IDMapping{
{
HostID: uint32(u.Uid),
ContainerID: 0,
Size: 1,
},
}
// setup GID mappings
spec.Linux.GIDMappings = []specs.IDMapping{
{
HostID: uint32(u.Gid),
ContainerID: 0,
Size: 1,
},
}
if err := unpackRootfs(spec); err != nil { if err := unpackRootfs(spec); err != nil {
logrus.Fatal(err) logrus.Fatal(err)

View file

@ -6,7 +6,6 @@ import (
"os" "os"
"github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/archive"
"github.com/docker/docker/pkg/idtools"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
) )
@ -16,39 +15,12 @@ func unpackRootfs(spec *specs.Spec) error {
return err return err
} }
if len(spec.Linux.UIDMappings) > 0 && len(spec.Linux.GIDMappings) > 0 { if err := os.MkdirAll(defaultRootfsDir, 0755); err != nil {
if err := idtools.MkdirAs(defaultRootfsDir, 0755, int(spec.Linux.UIDMappings[0].HostID), int(spec.Linux.GIDMappings[0].HostID)); err != nil { return err
return err
}
} else {
if err := os.MkdirAll(defaultRootfsDir, 0755); err != nil {
return err
}
}
uidMaps := []idtools.IDMap{}
gidMaps := []idtools.IDMap{}
for _, u := range spec.Linux.UIDMappings {
uidMaps = append(uidMaps, idtools.IDMap{
ContainerID: int(u.ContainerID),
HostID: int(u.HostID),
Size: int(u.Size),
})
}
for _, g := range spec.Linux.GIDMappings {
gidMaps = append(gidMaps, idtools.IDMap{
ContainerID: int(g.ContainerID),
HostID: int(g.HostID),
Size: int(g.Size),
})
} }
r := bytes.NewReader(data) r := bytes.NewReader(data)
if err := archive.Untar(r, defaultRootfsDir, &archive.TarOptions{ if err := archive.Untar(r, defaultRootfsDir, nil); err != nil {
UIDMaps: uidMaps,
GIDMaps: gidMaps,
}); err != nil {
return err return err
} }

14
spec.go
View file

@ -100,20 +100,6 @@ var (
}, },
}, },
Linux: specs.Linux{ Linux: specs.Linux{
UIDMappings: []specs.IDMapping{
{
HostID: remappedUID,
ContainerID: 0,
Size: 46578392,
},
},
GIDMappings: []specs.IDMapping{
{
HostID: remappedGID,
ContainerID: 0,
Size: 46578392,
},
},
MaskedPaths: []string{ MaskedPaths: []string{
"/proc/kcore", "/proc/kcore",
"/proc/latency_stats", "/proc/latency_stats",

View file

@ -7,4 +7,5 @@ import "syscall"
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building // GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
// with earlier versions // with earlier versions
func enableSetgroups(sys *syscall.SysProcAttr) { func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = false
} }

View file

@ -210,22 +210,23 @@ func setupUser(config *initConfig) error {
return err return err
} }
var addGroups []int /* var addGroups []int
if len(config.Config.AdditionalGroups) > 0 { if len(config.Config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath) addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
if err != nil { if err != nil {
return err return err
} }
} }*/
// before we change to the container's user make sure that the processes STDIO // before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to. // is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil { if err := fixStdioPermissions(execUser); err != nil {
return err return err
} }
suppGroups := append(execUser.Sgids, addGroups...) /*
if err := syscall.Setgroups(suppGroups); err != nil { suppGroups := append(execUser.Sgids, addGroups...)
return err if err := syscall.Setgroups(suppGroups); err != nil {
} return err
}*/
if err := system.Setgid(execUser.Gid); err != nil { if err := system.Setgid(execUser.Gid); err != nil {
return err return err

View file

@ -27,19 +27,19 @@ struct clone_arg {
* Reserve some space for clone() to locate arguments * Reserve some space for clone() to locate arguments
* and retcode in this place * and retcode in this place
*/ */
char stack[4096] __attribute__((aligned(16))); char stack[4096] __attribute__ ((aligned(16)));
char stack_ptr[0]; char stack_ptr[0];
jmp_buf *env; jmp_buf *env;
}; };
struct nsenter_config { struct nsenter_config {
uint32_t cloneflags; uint32_t cloneflags;
char *uidmap; char *uidmap;
int uidmap_len; int uidmap_len;
char *gidmap; char *gidmap;
int gidmap_len; int gidmap_len;
uint8_t is_setgroup; uint8_t is_setgroup;
int consolefd; int consolefd;
}; };
// list of known message types we want to send to bootstrap program // list of known message types we want to send to bootstrap program
@ -55,18 +55,18 @@ struct nsenter_config {
// Use raw setns syscall for versions of glibc that don't include it // Use raw setns syscall for versions of glibc that don't include it
// (namely glibc-2.12) // (namely glibc-2.12)
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
#define _GNU_SOURCE #define _GNU_SOURCE
#include "syscall.h" #include "syscall.h"
#if defined(__NR_setns) && !defined(SYS_setns) #if defined(__NR_setns) && !defined(SYS_setns)
#define SYS_setns __NR_setns #define SYS_setns __NR_setns
#endif #endif
#ifdef SYS_setns #ifdef SYS_setns
int setns(int fd, int nstype) int setns(int fd, int nstype)
{ {
return syscall(SYS_setns, fd, nstype); return syscall(SYS_setns, fd, nstype);
} }
#endif #endif
#endif #endif
#define pr_perror(fmt, ...) \ #define pr_perror(fmt, ...) \
@ -74,18 +74,18 @@ struct nsenter_config {
static int child_func(void *_arg) static int child_func(void *_arg)
{ {
struct clone_arg *arg = (struct clone_arg *)_arg; struct clone_arg *arg = (struct clone_arg *)_arg;
longjmp(*arg->env, 1); longjmp(*arg->env, 1);
} }
static int clone_parent(jmp_buf *env, int flags) __attribute__((noinline)); static int clone_parent(jmp_buf * env, int flags) __attribute__ ((noinline));
static int clone_parent(jmp_buf *env, int flags) static int clone_parent(jmp_buf * env, int flags)
{ {
struct clone_arg ca; struct clone_arg ca;
int child; int child;
ca.env = env; ca.env = env;
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD | flags, child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD | flags,
&ca); &ca);
// On old kernels, CLONE_PARENT cannot work with CLONE_NEWPID, // On old kernels, CLONE_PARENT cannot work with CLONE_NEWPID,
// unshare before clone to workaround this. // unshare before clone to workaround this.
@ -94,7 +94,7 @@ static int clone_parent(jmp_buf *env, int flags)
pr_perror("Unable to unshare namespaces"); pr_perror("Unable to unshare namespaces");
return -1; return -1;
} }
child = clone(child_func, ca.stack_ptr, SIGCHLD | CLONE_PARENT, child = clone(child_func, ca.stack_ptr, SIGCHLD | CLONE_PARENT,
&ca); &ca);
} }
return child; return child;
@ -104,9 +104,9 @@ static int clone_parent(jmp_buf *env, int flags)
// write pid to after nsexec finishes setting up the environment. // write pid to after nsexec finishes setting up the environment.
static int get_init_pipe() static int get_init_pipe()
{ {
char buf[PATH_MAX]; char buf[PATH_MAX];
char *initpipe; char *initpipe;
int pipenum = -1; int pipenum = -1;
initpipe = getenv("_LIBCONTAINER_INITPIPE"); initpipe = getenv("_LIBCONTAINER_INITPIPE");
if (initpipe == NULL) { if (initpipe == NULL) {
@ -141,19 +141,19 @@ static int num_namespaces(char *nspaths)
static uint32_t readint32(char *buf) static uint32_t readint32(char *buf)
{ {
return *(uint32_t *)buf; return *(uint32_t *) buf;
} }
static uint8_t readint8(char *buf) static uint8_t readint8(char *buf)
{ {
return *(uint8_t *)buf; return *(uint8_t *) buf;
} }
static void update_process_idmap(char *pathfmt, int pid, char *map, int map_len) static void update_process_idmap(char *pathfmt, int pid, char *map, int map_len)
{ {
char buf[PATH_MAX]; char buf[PATH_MAX];
int len; int len;
int fd; int fd;
len = snprintf(buf, sizeof(buf), pathfmt, pid); len = snprintf(buf, sizeof(buf), pathfmt, pid);
if (len < 0) { if (len < 0) {
@ -174,7 +174,7 @@ static void update_process_idmap(char *pathfmt, int pid, char *map, int map_len)
exit(1); exit(1);
} else if (len != map_len) { } else if (len != map_len) {
pr_perror("Failed to write data to %s (%d/%d)", pr_perror("Failed to write data to %s (%d/%d)",
buf, len, map_len); buf, len, map_len);
close(fd); close(fd);
exit(1); exit(1);
} }
@ -191,51 +191,63 @@ static void update_process_uidmap(int pid, char *map, int map_len)
update_process_idmap("/proc/%d/uid_map", pid, map, map_len); update_process_idmap("/proc/%d/uid_map", pid, map, map_len);
} }
static void update_process_gidmap(int pid, uint8_t is_setgroup, char *map, int map_len) static void proc_setgroups_write(pid_t child_pid, char *str)
{
char setgroups_path[PATH_MAX];
int fd;
snprintf(setgroups_path, PATH_MAX, "/proc/%ld/setgroups",
(long)child_pid);
fd = open(setgroups_path, O_RDWR);
if (fd == -1) {
/* We may be on a system that doesn't support
/proc/PID/setgroups. In that case, the file won't exist,
and the system won't impose the restrictions that Linux 3.19
added. That's fine: we don't need to do anything in order
to permit 'gid_map' to be updated.
However, if the error from open() was something other than
the ENOENT error that is expected for that case, let the
user know. */
if (errno != ENOENT)
pr_perror("failed to open %s: %s\n", setgroups_path,
strerror(errno));
return;
}
if (write(fd, str, strlen(str)) == -1)
pr_perror("failed to write %s: %s\n", setgroups_path,
strerror(errno));
close(fd);
}
static void update_process_gidmap(int pid, uint8_t is_setgroup, char *map,
int map_len)
{ {
if ((map == NULL) || (map_len <= 0)) { if ((map == NULL) || (map_len <= 0)) {
return; return;
} }
if (is_setgroup == 1) { if (is_setgroup == 1) {
int fd; proc_setgroups_write(pid, "allow");
int len; } else {
char buf[PATH_MAX]; /* For unprivileged users we need to write to setgroups first. */
proc_setgroups_write(pid, "deny");
len = snprintf(buf, sizeof(buf), "/proc/%d/setgroups", pid);
if (len < 0) {
pr_perror("failed to get setgroups path for %d", pid);
exit(1);
}
fd = open(buf, O_RDWR);
if (fd == -1) {
pr_perror("failed to open %s", buf);
exit(1);
}
if (write(fd, "allow", 5) != 5) {
// If the kernel is too old to support
// /proc/PID/setgroups, write will return
// ENOENT; this is OK.
if (errno != ENOENT) {
pr_perror("failed to write allow to %s", buf);
close(fd);
exit(1);
}
}
close(fd);
} }
update_process_idmap("/proc/%d/gid_map", pid, map, map_len); update_process_idmap("/proc/%d/gid_map", pid, map, map_len);
} }
static void start_child(int pipenum, jmp_buf * env, int syncpipe[2],
static void start_child(int pipenum, jmp_buf *env, int syncpipe[2], struct nsenter_config *config)
struct nsenter_config *config)
{ {
int len; int len;
int childpid; int childpid;
char buf[PATH_MAX]; char buf[PATH_MAX];
uint8_t syncbyte = 1; uint8_t syncbyte = 1;
// We must fork to actually enter the PID namespace, use CLONE_PARENT // We must fork to actually enter the PID namespace, use CLONE_PARENT
@ -246,11 +258,11 @@ static void start_child(int pipenum, jmp_buf *env, int syncpipe[2],
pr_perror("Unable to fork"); pr_perror("Unable to fork");
exit(1); exit(1);
} }
// update uid_map and gid_map for the child process if they // update uid_map and gid_map for the child process if they
// were provided // were provided
update_process_uidmap(childpid, config->uidmap, config->uidmap_len); update_process_uidmap(childpid, config->uidmap, config->uidmap_len);
update_process_gidmap(childpid, config->is_setgroup, config->gidmap, config->gidmap_len); update_process_gidmap(childpid, config->is_setgroup, config->gidmap,
config->gidmap_len);
// Send the sync signal to the child // Send the sync signal to the child
close(syncpipe[0]); close(syncpipe[0]);
@ -259,7 +271,6 @@ static void start_child(int pipenum, jmp_buf *env, int syncpipe[2],
pr_perror("failed to write sync byte to child"); pr_perror("failed to write sync byte to child");
exit(1); exit(1);
} }
// Send the child pid back to our parent // Send the child pid back to our parent
len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", childpid); len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", childpid);
if ((len < 0) || (write(pipenum, buf, len) != len)) { if ((len < 0) || (write(pipenum, buf, len) != len)) {
@ -271,12 +282,13 @@ static void start_child(int pipenum, jmp_buf *env, int syncpipe[2],
exit(0); exit(0);
} }
static struct nsenter_config process_nl_attributes(int pipenum, char *data, int data_size) static struct nsenter_config process_nl_attributes(int pipenum, char *data,
int data_size)
{ {
struct nsenter_config config = {0}; struct nsenter_config config = { 0 };
struct nlattr *nlattr; struct nlattr *nlattr;
int payload_len; int payload_len;
int start = 0; int start = 0;
config.consolefd = -1; config.consolefd = -1;
while (start < data_size) { while (start < data_size) {
@ -298,12 +310,12 @@ static struct nsenter_config process_nl_attributes(int pipenum, char *data, int
} else if (nlattr->nla_type == NS_PATHS_ATTR) { } else if (nlattr->nla_type == NS_PATHS_ATTR) {
// if custom namespaces are required, open all // if custom namespaces are required, open all
// descriptors and perform setns on them // descriptors and perform setns on them
int i, j; int i, j;
int nslen = num_namespaces(data + start); int nslen = num_namespaces(data + start);
int fds[nslen]; int fds[nslen];
char *nslist[nslen]; char *nslist[nslen];
char *ns; char *ns;
char *saveptr; char *saveptr;
for (i = 0; i < nslen; i++) { for (i = 0; i < nslen; i++) {
char *str = NULL; char *str = NULL;
@ -328,19 +340,21 @@ static struct nsenter_config process_nl_attributes(int pipenum, char *data, int
for (i = 0; i < nslen; i++) { for (i = 0; i < nslen; i++) {
if (setns(fds[i], 0) != 0) { if (setns(fds[i], 0) != 0) {
pr_perror("Failed to setns to %s", nslist[i]); pr_perror("Failed to setns to %s",
nslist[i]);
exit(1); exit(1);
} }
close(fds[i]); close(fds[i]);
} }
} else if (nlattr->nla_type == UIDMAP_ATTR) { } else if (nlattr->nla_type == UIDMAP_ATTR) {
config.uidmap = data + start; config.uidmap = data + start;
config.uidmap_len = payload_len; config.uidmap_len = payload_len;
} else if (nlattr->nla_type == GIDMAP_ATTR) { } else if (nlattr->nla_type == GIDMAP_ATTR) {
config.gidmap = data + start; config.gidmap = data + start;
config.gidmap_len = payload_len; config.gidmap_len = payload_len;
} else if (nlattr->nla_type == SETGROUP_ATTR) { } else if (nlattr->nla_type == SETGROUP_ATTR) {
config.is_setgroup = readint8(data + start); config.is_setgroup = readint8(data + start);
config.is_setgroup = 0;
} else { } else {
pr_perror("Unknown netlink message type %d", pr_perror("Unknown netlink message type %d",
nlattr->nla_type); nlattr->nla_type);
@ -363,10 +377,9 @@ void nsexec(void)
if (pipenum == -1) { if (pipenum == -1) {
return; return;
} }
// Retrieve the netlink header // Retrieve the netlink header
struct nlmsghdr nl_msg_hdr; struct nlmsghdr nl_msg_hdr;
int len; int len;
if ((len = read(pipenum, &nl_msg_hdr, NLMSG_HDRLEN)) != NLMSG_HDRLEN) { if ((len = read(pipenum, &nl_msg_hdr, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
pr_perror("Invalid netlink header length %d", len); pr_perror("Invalid netlink header length %d", len);
@ -382,9 +395,8 @@ void nsexec(void)
pr_perror("Unexpected msg type %d", nl_msg_hdr.nlmsg_type); pr_perror("Unexpected msg type %d", nl_msg_hdr.nlmsg_type);
exit(1); exit(1);
} }
// Retrieve data // Retrieve data
int nl_total_size = NLMSG_PAYLOAD(&nl_msg_hdr, 0); int nl_total_size = NLMSG_PAYLOAD(&nl_msg_hdr, 0);
char data[nl_total_size]; char data[nl_total_size];
if ((len = read(pipenum, data, nl_total_size)) != nl_total_size) { if ((len = read(pipenum, data, nl_total_size)) != nl_total_size) {
@ -393,10 +405,11 @@ void nsexec(void)
exit(1); exit(1);
} }
jmp_buf env; jmp_buf env;
int syncpipe[2] = {-1, -1}; int syncpipe[2] = { -1, -1 };
struct nsenter_config config = process_nl_attributes(pipenum, struct nsenter_config config = process_nl_attributes(pipenum,
data, nl_total_size); data,
nl_total_size);
// required clone_flags to be passed // required clone_flags to be passed
if (config.cloneflags == -1) { if (config.cloneflags == -1) {
@ -413,7 +426,7 @@ void nsexec(void)
if (setjmp(env) == 1) { if (setjmp(env) == 1) {
// Child // Child
uint8_t s = 0; uint8_t s = 0;
int consolefd = config.consolefd; int consolefd = config.consolefd;
// close the writing side of pipe // close the writing side of pipe
close(syncpipe[1]); close(syncpipe[1]);
@ -438,10 +451,12 @@ void nsexec(void)
pr_perror("setgid failed"); pr_perror("setgid failed");
exit(1); exit(1);
} }
if (setgroups(0, NULL) == -1) { if (config.is_setgroup == 1) {
pr_perror("setgroups failed"); if (setgroups(0, NULL) == -1) {
exit(1); pr_perror("setgroups failed");
exit(1);
}
} }
if (consolefd != -1) { if (consolefd != -1) {
@ -462,11 +477,9 @@ void nsexec(void)
exit(1); exit(1);
} }
} }
// Finish executing, let the Go runtime take over. // Finish executing, let the Go runtime take over.
return; return;
} }
// Parent // Parent
start_child(pipenum, &env, syncpipe, &config); start_child(pipenum, &env, syncpipe, &config);
} }

View file

@ -13,6 +13,7 @@ import (
"strconv" "strconv"
"syscall" "syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
@ -226,6 +227,10 @@ func (p *initProcess) execSetns() error {
func (p *initProcess) start() error { func (p *initProcess) start() error {
defer p.parentPipe.Close() defer p.parentPipe.Close()
if logrus.GetLevel() == logrus.DebugLevel {
p.cmd.Stdout = os.Stdout
p.cmd.Stderr = os.Stderr
}
err := p.cmd.Start() err := p.cmd.Start()
p.process.ops = p p.process.ops = p
p.childPipe.Close() p.childPipe.Close()

View file

@ -7,5 +7,5 @@ import "syscall"
// Set the GidMappingsEnableSetgroups member to true, so the process's // Set the GidMappingsEnableSetgroups member to true, so the process's
// setgroups proc entry wont be set to 'deny' if GidMappings are set // setgroups proc entry wont be set to 'deny' if GidMappings are set
func enableSetgroups(sys *syscall.SysProcAttr) { func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = true sys.GidMappingsEnableSetgroups = false
} }