Adding initial version of C-based nsenter for allowing execin in
libcontainer. Docker-DCO-1.1-Signed-off-by: Victor Marmol <vmarmol@google.com> (github: vmarmol)
This commit is contained in:
parent
0a67ddd8c9
commit
985fe60646
3 changed files with 224 additions and 97 deletions
|
@ -3,119 +3,55 @@
|
|||
package namespaces
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/dotcloud/docker/pkg/label"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/mount"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
)
|
||||
|
||||
// ExecIn uses an existing pid and joins the pid's namespaces with the new command.
|
||||
func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) {
|
||||
func ExecIn(container *libcontainer.Container, nspid int, args []string) error {
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := LoadContainerEnvironment(container); err != nil {
|
||||
return -1, err
|
||||
return err
|
||||
}
|
||||
|
||||
for key, enabled := range container.Namespaces {
|
||||
// skip the PID namespace on unshare because it it not supported
|
||||
if enabled && key != "NEWPID" {
|
||||
if ns := libcontainer.GetNamespace(key); ns != nil {
|
||||
if err := system.Unshare(ns.Value); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fds, err := getNsFds(nspid, container)
|
||||
closeFds := func() {
|
||||
for _, f := range fds {
|
||||
system.Closefd(f)
|
||||
}
|
||||
}
|
||||
// TODO(vmarmol): If this gets too long, send it over a pipe to the child.
|
||||
// Marshall the container into JSON since it won't be available in the namespace.
|
||||
containerJson, err := json.Marshal(container)
|
||||
if err != nil {
|
||||
closeFds()
|
||||
return -1, err
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO(vmarmol): Move this to the container JSON.
|
||||
processLabel, err := label.GetPidCon(nspid)
|
||||
if err != nil {
|
||||
closeFds()
|
||||
return -1, err
|
||||
}
|
||||
// foreach namespace fd, use setns to join an existing container's namespaces
|
||||
for _, fd := range fds {
|
||||
if fd > 0 {
|
||||
if err := system.Setns(fd, 0); err != nil {
|
||||
closeFds()
|
||||
return -1, fmt.Errorf("setns %s", err)
|
||||
}
|
||||
}
|
||||
system.Closefd(fd)
|
||||
return err
|
||||
}
|
||||
|
||||
// if the container has a new pid and mount namespace we need to
|
||||
// remount proc and sys to pick up the changes
|
||||
if container.Namespaces["NEWNS"] && container.Namespaces["NEWPID"] {
|
||||
pid, err := system.Fork()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if pid == 0 {
|
||||
// TODO: make all raw syscalls to be fork safe
|
||||
if err := system.Unshare(syscall.CLONE_NEWNS); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if err := mount.RemountProc(); err != nil {
|
||||
return -1, fmt.Errorf("remount proc %s", err)
|
||||
}
|
||||
if err := mount.RemountSys(); err != nil {
|
||||
return -1, fmt.Errorf("remount sys %s", err)
|
||||
}
|
||||
goto dropAndExec
|
||||
}
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
state, err := proc.Wait()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus())
|
||||
}
|
||||
dropAndExec:
|
||||
if err := FinalizeNamespace(container); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
err = label.SetProcessLabel(processLabel)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if err := system.Execv(args[0], args[0:], container.Env); err != nil {
|
||||
return -1, err
|
||||
// Enter the namespace and then finish setup
|
||||
finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)}
|
||||
finalArgs = append(finalArgs, args...)
|
||||
if err := system.Execv(finalArgs[0], finalArgs[0:], container.Env); err != nil {
|
||||
return err
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) {
|
||||
fds := []uintptr{}
|
||||
|
||||
for key, enabled := range container.Namespaces {
|
||||
if enabled {
|
||||
if ns := libcontainer.GetNamespace(key); ns != nil {
|
||||
f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", ns.File), os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return fds, err
|
||||
}
|
||||
fds = append(fds, f.Fd())
|
||||
}
|
||||
}
|
||||
// NsEnter is run after entering the namespace.
|
||||
func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error {
|
||||
if err := FinalizeNamespace(container); err != nil {
|
||||
return err
|
||||
}
|
||||
return fds, nil
|
||||
if err := label.SetProcessLabel(processLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
|
||||
return err
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
|
142
libcontainer/namespaces/nsenter.go
Normal file
142
libcontainer/namespaces/nsenter.go
Normal file
|
@ -0,0 +1,142 @@
|
|||
package namespaces
|
||||
|
||||
/*
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static const kBufSize = 256;
|
||||
|
||||
void get_args(int *argc, char ***argv) {
|
||||
// Read argv
|
||||
int fd = open("/proc/self/cmdline", O_RDONLY);
|
||||
|
||||
// Read the whole commandline.
|
||||
ssize_t contents_size = 0;
|
||||
ssize_t contents_offset = 0;
|
||||
char *contents = NULL;
|
||||
ssize_t bytes_read = 0;
|
||||
do {
|
||||
contents_size += kBufSize;
|
||||
contents = (char *) realloc(contents, contents_size);
|
||||
bytes_read = read(fd, contents + contents_offset, contents_size - contents_offset);
|
||||
contents_offset += bytes_read;
|
||||
} while (bytes_read > 0);
|
||||
close(fd);
|
||||
|
||||
// Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args.
|
||||
ssize_t i;
|
||||
*argc = 0;
|
||||
for (i = 0; i < contents_offset; i++) {
|
||||
if (contents[i] == '\0') {
|
||||
(*argc)++;
|
||||
}
|
||||
}
|
||||
*argv = (char **) malloc(sizeof(char *) * ((*argc) + 1));
|
||||
int idx;
|
||||
for (idx = 0; idx < (*argc); idx++) {
|
||||
(*argv)[idx] = contents;
|
||||
contents += strlen(contents) + 1;
|
||||
}
|
||||
(*argv)[*argc] = NULL;
|
||||
}
|
||||
|
||||
void nsenter() {
|
||||
int argc;
|
||||
char **argv;
|
||||
get_args(&argc, &argv);
|
||||
|
||||
// Ignore if this is not for us.
|
||||
if (argc < 2 || strcmp(argv[1], "nsenter") != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// USAGE: <binary> nsenter <PID> <process label> <container JSON> <argv>...
|
||||
if (argc < 6) {
|
||||
fprintf(stderr, "nsenter: Incorrect usage, not enough arguments\n");
|
||||
exit(1);
|
||||
}
|
||||
pid_t init_pid = strtol(argv[2], NULL, 10);
|
||||
if (errno != 0 || init_pid <= 0) {
|
||||
fprintf(stderr, "nsenter: Failed to parse PID from \"%s\" with error: \"%s\"\n", argv[2], strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
argc -= 3;
|
||||
argv += 3;
|
||||
|
||||
// Setns on all supported namespaces.
|
||||
char ns_dir[kBufSize];
|
||||
memset(ns_dir, 0, kBufSize);
|
||||
if (snprintf(ns_dir, kBufSize - 1, "/proc/%d/ns/", init_pid) < 0) {
|
||||
fprintf(stderr, "nsenter: Error getting ns dir path with error: \"%s\"\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
struct dirent *dent;
|
||||
DIR *dir = opendir(ns_dir);
|
||||
if (dir == NULL) {
|
||||
fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
while((dent = readdir(dir)) != NULL) {
|
||||
if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get and open the namespace for the init we are joining..
|
||||
char buf[kBufSize];
|
||||
memset(buf, 0, kBufSize);
|
||||
strncat(buf, ns_dir, kBufSize - 1);
|
||||
strncat(buf, dent->d_name, kBufSize - 1);
|
||||
int fd = open(buf, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", buf, dent->d_name, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Set the namespace.
|
||||
if (setns(fd, 0) == -1) {
|
||||
fprintf(stderr, "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", dent->d_name, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
// We must fork to actually enter the PID namespace.
|
||||
int child = fork();
|
||||
if (child == 0) {
|
||||
// Finish executing, let the Go runtime take over.
|
||||
return;
|
||||
} else {
|
||||
// Parent, wait for the child.
|
||||
int status = 0;
|
||||
if (waitpid(child, &status, 0) == -1) {
|
||||
fprintf(stderr, "nsenter: Failed to waitpid with error: \"%s\"\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Forward the child's exit code or re-send its death signal.
|
||||
if (WIFEXITED(status)) {
|
||||
exit(WEXITSTATUS(status));
|
||||
} else if (WIFSIGNALED(status)) {
|
||||
kill(getpid(), WTERMSIG(status));
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__attribute__((constructor)) init() {
|
||||
nsenter();
|
||||
}
|
||||
*/
|
||||
import "C"
|
|
@ -27,20 +27,20 @@ func main() {
|
|||
log.Fatalf("invalid number of arguments %d", len(os.Args))
|
||||
}
|
||||
|
||||
container, err := loadContainer()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to load container: %s", err)
|
||||
}
|
||||
|
||||
switch os.Args[1] {
|
||||
case "exec": // this is executed outside of the namespace in the cwd
|
||||
container, err := loadContainer()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to load container: %s", err)
|
||||
}
|
||||
|
||||
var nspid, exitCode int
|
||||
if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
|
||||
log.Fatalf("unable to read pid: %s", err)
|
||||
}
|
||||
|
||||
if nspid > 0 {
|
||||
exitCode, err = namespaces.ExecIn(container, nspid, os.Args[2:])
|
||||
err = namespaces.ExecIn(container, nspid, os.Args[2:])
|
||||
} else {
|
||||
term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
|
||||
exitCode, err = startContainer(container, term, dataPath, os.Args[2:])
|
||||
|
@ -50,7 +50,36 @@ func main() {
|
|||
log.Fatalf("failed to exec: %s", err)
|
||||
}
|
||||
os.Exit(exitCode)
|
||||
case "nsenter": // this is executed inside the namespace.
|
||||
// nsinit nsenter <pid> <process label> <container JSON> <cmd>...
|
||||
if len(os.Args) < 6 {
|
||||
log.Fatalf("incorrect usage: nsinit nsenter <pid> <process label> <container JSON> <cmd>...")
|
||||
}
|
||||
|
||||
container, err := loadContainerFromJson(os.Args[4])
|
||||
if err != nil {
|
||||
log.Fatalf("unable to load container: %s", err)
|
||||
}
|
||||
|
||||
nspid, err := strconv.Atoi(os.Args[2])
|
||||
if err != nil {
|
||||
log.Fatalf("unable to read pid: %s from %q", err, os.Args[2])
|
||||
}
|
||||
|
||||
if nspid <= 0 {
|
||||
log.Fatalf("cannot enter into namespaces without valid pid: %q", nspid)
|
||||
}
|
||||
|
||||
err = namespaces.NsEnter(container, os.Args[3], nspid, os.Args[5:])
|
||||
if err != nil {
|
||||
log.Fatalf("failed to nsenter: %s", err)
|
||||
}
|
||||
case "init": // this is executed inside of the namespace to setup the container
|
||||
container, err := loadContainer()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to load container: %s", err)
|
||||
}
|
||||
|
||||
// by default our current dir is always our rootfs
|
||||
rootfs, err := os.Getwd()
|
||||
if err != nil {
|
||||
|
@ -70,6 +99,11 @@ func main() {
|
|||
log.Fatalf("unable to initialize for container: %s", err)
|
||||
}
|
||||
case "stats":
|
||||
container, err := loadContainer()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to load container: %s", err)
|
||||
}
|
||||
|
||||
// returns the stats of the current container.
|
||||
stats, err := getContainerStats(container)
|
||||
if err != nil {
|
||||
|
@ -80,6 +114,11 @@ func main() {
|
|||
os.Exit(0)
|
||||
|
||||
case "spec":
|
||||
container, err := loadContainer()
|
||||
if err != nil {
|
||||
log.Fatalf("unable to load container: %s", err)
|
||||
}
|
||||
|
||||
// returns the spec of the current container.
|
||||
spec, err := getContainerSpec(container)
|
||||
if err != nil {
|
||||
|
@ -90,13 +129,14 @@ func main() {
|
|||
os.Exit(0)
|
||||
|
||||
default:
|
||||
log.Fatalf("command not supported for nsinit %s", os.Args[0])
|
||||
log.Fatalf("command not supported for nsinit %s", os.Args[1])
|
||||
}
|
||||
}
|
||||
|
||||
func loadContainer() (*libcontainer.Container, error) {
|
||||
f, err := os.Open(filepath.Join(dataPath, "container.json"))
|
||||
if err != nil {
|
||||
log.Printf("Path: %q", filepath.Join(dataPath, "container.json"))
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
@ -108,6 +148,15 @@ func loadContainer() (*libcontainer.Container, error) {
|
|||
return container, nil
|
||||
}
|
||||
|
||||
func loadContainerFromJson(rawData string) (*libcontainer.Container, error) {
|
||||
container := &libcontainer.Container{}
|
||||
err := json.Unmarshal([]byte(rawData), container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return container, nil
|
||||
}
|
||||
|
||||
func readPid() (int, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
|
||||
if err != nil {
|
||||
|
|
Loading…
Reference in a new issue