Adding initial version of C-based nsenter for allowing execin in

libcontainer.

Docker-DCO-1.1-Signed-off-by: Victor Marmol <vmarmol@google.com> (github: vmarmol)
This commit is contained in:
Victor Marmol 2014-05-24 01:06:14 +00:00
parent 0a67ddd8c9
commit 985fe60646
3 changed files with 224 additions and 97 deletions

View file

@ -3,119 +3,55 @@
package namespaces
import (
"fmt"
"encoding/json"
"os"
"path/filepath"
"strconv"
"syscall"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/system"
)
// ExecIn uses an existing pid and joins the pid's namespaces with the new command.
func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) {
func ExecIn(container *libcontainer.Container, nspid int, args []string) error {
// clear the current processes env and replace it with the environment
// defined on the container
if err := LoadContainerEnvironment(container); err != nil {
return -1, err
return err
}
for key, enabled := range container.Namespaces {
// skip the PID namespace on unshare because it it not supported
if enabled && key != "NEWPID" {
if ns := libcontainer.GetNamespace(key); ns != nil {
if err := system.Unshare(ns.Value); err != nil {
return -1, err
}
}
}
}
fds, err := getNsFds(nspid, container)
closeFds := func() {
for _, f := range fds {
system.Closefd(f)
}
}
// TODO(vmarmol): If this gets too long, send it over a pipe to the child.
// Marshall the container into JSON since it won't be available in the namespace.
containerJson, err := json.Marshal(container)
if err != nil {
closeFds()
return -1, err
return err
}
// TODO(vmarmol): Move this to the container JSON.
processLabel, err := label.GetPidCon(nspid)
if err != nil {
closeFds()
return -1, err
}
// foreach namespace fd, use setns to join an existing container's namespaces
for _, fd := range fds {
if fd > 0 {
if err := system.Setns(fd, 0); err != nil {
closeFds()
return -1, fmt.Errorf("setns %s", err)
}
}
system.Closefd(fd)
return err
}
// if the container has a new pid and mount namespace we need to
// remount proc and sys to pick up the changes
if container.Namespaces["NEWNS"] && container.Namespaces["NEWPID"] {
pid, err := system.Fork()
if err != nil {
return -1, err
}
if pid == 0 {
// TODO: make all raw syscalls to be fork safe
if err := system.Unshare(syscall.CLONE_NEWNS); err != nil {
return -1, err
}
if err := mount.RemountProc(); err != nil {
return -1, fmt.Errorf("remount proc %s", err)
}
if err := mount.RemountSys(); err != nil {
return -1, fmt.Errorf("remount sys %s", err)
}
goto dropAndExec
}
proc, err := os.FindProcess(pid)
if err != nil {
return -1, err
}
state, err := proc.Wait()
if err != nil {
return -1, err
}
os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus())
}
dropAndExec:
if err := FinalizeNamespace(container); err != nil {
return -1, err
}
err = label.SetProcessLabel(processLabel)
if err != nil {
return -1, err
}
if err := system.Execv(args[0], args[0:], container.Env); err != nil {
return -1, err
// Enter the namespace and then finish setup
finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)}
finalArgs = append(finalArgs, args...)
if err := system.Execv(finalArgs[0], finalArgs[0:], container.Env); err != nil {
return err
}
panic("unreachable")
}
func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) {
fds := []uintptr{}
for key, enabled := range container.Namespaces {
if enabled {
if ns := libcontainer.GetNamespace(key); ns != nil {
f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", ns.File), os.O_RDONLY, 0)
if err != nil {
return fds, err
}
fds = append(fds, f.Fd())
}
}
// NsEnter is run after entering the namespace.
func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error {
if err := FinalizeNamespace(container); err != nil {
return err
}
return fds, nil
if err := label.SetProcessLabel(processLabel); err != nil {
return err
}
if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
return err
}
panic("unreachable")
}

View file

@ -0,0 +1,142 @@
package namespaces
/*
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
static const kBufSize = 256;
void get_args(int *argc, char ***argv) {
// Read argv
int fd = open("/proc/self/cmdline", O_RDONLY);
// Read the whole commandline.
ssize_t contents_size = 0;
ssize_t contents_offset = 0;
char *contents = NULL;
ssize_t bytes_read = 0;
do {
contents_size += kBufSize;
contents = (char *) realloc(contents, contents_size);
bytes_read = read(fd, contents + contents_offset, contents_size - contents_offset);
contents_offset += bytes_read;
} while (bytes_read > 0);
close(fd);
// Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args.
ssize_t i;
*argc = 0;
for (i = 0; i < contents_offset; i++) {
if (contents[i] == '\0') {
(*argc)++;
}
}
*argv = (char **) malloc(sizeof(char *) * ((*argc) + 1));
int idx;
for (idx = 0; idx < (*argc); idx++) {
(*argv)[idx] = contents;
contents += strlen(contents) + 1;
}
(*argv)[*argc] = NULL;
}
void nsenter() {
int argc;
char **argv;
get_args(&argc, &argv);
// Ignore if this is not for us.
if (argc < 2 || strcmp(argv[1], "nsenter") != 0) {
return;
}
// USAGE: <binary> nsenter <PID> <process label> <container JSON> <argv>...
if (argc < 6) {
fprintf(stderr, "nsenter: Incorrect usage, not enough arguments\n");
exit(1);
}
pid_t init_pid = strtol(argv[2], NULL, 10);
if (errno != 0 || init_pid <= 0) {
fprintf(stderr, "nsenter: Failed to parse PID from \"%s\" with error: \"%s\"\n", argv[2], strerror(errno));
exit(1);
}
argc -= 3;
argv += 3;
// Setns on all supported namespaces.
char ns_dir[kBufSize];
memset(ns_dir, 0, kBufSize);
if (snprintf(ns_dir, kBufSize - 1, "/proc/%d/ns/", init_pid) < 0) {
fprintf(stderr, "nsenter: Error getting ns dir path with error: \"%s\"\n", strerror(errno));
exit(1);
}
struct dirent *dent;
DIR *dir = opendir(ns_dir);
if (dir == NULL) {
fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno));
exit(1);
}
while((dent = readdir(dir)) != NULL) {
if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) {
continue;
}
// Get and open the namespace for the init we are joining..
char buf[kBufSize];
memset(buf, 0, kBufSize);
strncat(buf, ns_dir, kBufSize - 1);
strncat(buf, dent->d_name, kBufSize - 1);
int fd = open(buf, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", buf, dent->d_name, strerror(errno));
exit(1);
}
// Set the namespace.
if (setns(fd, 0) == -1) {
fprintf(stderr, "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", dent->d_name, strerror(errno));
exit(1);
}
close(fd);
}
closedir(dir);
// We must fork to actually enter the PID namespace.
int child = fork();
if (child == 0) {
// Finish executing, let the Go runtime take over.
return;
} else {
// Parent, wait for the child.
int status = 0;
if (waitpid(child, &status, 0) == -1) {
fprintf(stderr, "nsenter: Failed to waitpid with error: \"%s\"\n", strerror(errno));
exit(1);
}
// Forward the child's exit code or re-send its death signal.
if (WIFEXITED(status)) {
exit(WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
kill(getpid(), WTERMSIG(status));
}
exit(1);
}
return;
}
__attribute__((constructor)) init() {
nsenter();
}
*/
import "C"

View file

@ -27,20 +27,20 @@ func main() {
log.Fatalf("invalid number of arguments %d", len(os.Args))
}
container, err := loadContainer()
if err != nil {
log.Fatalf("unable to load container: %s", err)
}
switch os.Args[1] {
case "exec": // this is executed outside of the namespace in the cwd
container, err := loadContainer()
if err != nil {
log.Fatalf("unable to load container: %s", err)
}
var nspid, exitCode int
if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
log.Fatalf("unable to read pid: %s", err)
}
if nspid > 0 {
exitCode, err = namespaces.ExecIn(container, nspid, os.Args[2:])
err = namespaces.ExecIn(container, nspid, os.Args[2:])
} else {
term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
exitCode, err = startContainer(container, term, dataPath, os.Args[2:])
@ -50,7 +50,36 @@ func main() {
log.Fatalf("failed to exec: %s", err)
}
os.Exit(exitCode)
case "nsenter": // this is executed inside the namespace.
// nsinit nsenter <pid> <process label> <container JSON> <cmd>...
if len(os.Args) < 6 {
log.Fatalf("incorrect usage: nsinit nsenter <pid> <process label> <container JSON> <cmd>...")
}
container, err := loadContainerFromJson(os.Args[4])
if err != nil {
log.Fatalf("unable to load container: %s", err)
}
nspid, err := strconv.Atoi(os.Args[2])
if err != nil {
log.Fatalf("unable to read pid: %s from %q", err, os.Args[2])
}
if nspid <= 0 {
log.Fatalf("cannot enter into namespaces without valid pid: %q", nspid)
}
err = namespaces.NsEnter(container, os.Args[3], nspid, os.Args[5:])
if err != nil {
log.Fatalf("failed to nsenter: %s", err)
}
case "init": // this is executed inside of the namespace to setup the container
container, err := loadContainer()
if err != nil {
log.Fatalf("unable to load container: %s", err)
}
// by default our current dir is always our rootfs
rootfs, err := os.Getwd()
if err != nil {
@ -70,6 +99,11 @@ func main() {
log.Fatalf("unable to initialize for container: %s", err)
}
case "stats":
container, err := loadContainer()
if err != nil {
log.Fatalf("unable to load container: %s", err)
}
// returns the stats of the current container.
stats, err := getContainerStats(container)
if err != nil {
@ -80,6 +114,11 @@ func main() {
os.Exit(0)
case "spec":
container, err := loadContainer()
if err != nil {
log.Fatalf("unable to load container: %s", err)
}
// returns the spec of the current container.
spec, err := getContainerSpec(container)
if err != nil {
@ -90,13 +129,14 @@ func main() {
os.Exit(0)
default:
log.Fatalf("command not supported for nsinit %s", os.Args[0])
log.Fatalf("command not supported for nsinit %s", os.Args[1])
}
}
func loadContainer() (*libcontainer.Container, error) {
f, err := os.Open(filepath.Join(dataPath, "container.json"))
if err != nil {
log.Printf("Path: %q", filepath.Join(dataPath, "container.json"))
return nil, err
}
defer f.Close()
@ -108,6 +148,15 @@ func loadContainer() (*libcontainer.Container, error) {
return container, nil
}
func loadContainerFromJson(rawData string) (*libcontainer.Container, error) {
container := &libcontainer.Container{}
err := json.Unmarshal([]byte(rawData), container)
if err != nil {
return nil, err
}
return container, nil
}
func readPid() (int, error) {
data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
if err != nil {