Adding initial version of C-based nsenter for allowing execin in
libcontainer. Docker-DCO-1.1-Signed-off-by: Victor Marmol <vmarmol@google.com> (github: vmarmol)
This commit is contained in:
parent
0a67ddd8c9
commit
985fe60646
3 changed files with 224 additions and 97 deletions
|
@ -3,119 +3,55 @@
|
||||||
package namespaces
|
package namespaces
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"github.com/dotcloud/docker/pkg/label"
|
"github.com/dotcloud/docker/pkg/label"
|
||||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||||
"github.com/dotcloud/docker/pkg/libcontainer/mount"
|
|
||||||
"github.com/dotcloud/docker/pkg/system"
|
"github.com/dotcloud/docker/pkg/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExecIn uses an existing pid and joins the pid's namespaces with the new command.
|
// ExecIn uses an existing pid and joins the pid's namespaces with the new command.
|
||||||
func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) {
|
func ExecIn(container *libcontainer.Container, nspid int, args []string) error {
|
||||||
// clear the current processes env and replace it with the environment
|
// clear the current processes env and replace it with the environment
|
||||||
// defined on the container
|
// defined on the container
|
||||||
if err := LoadContainerEnvironment(container); err != nil {
|
if err := LoadContainerEnvironment(container); err != nil {
|
||||||
return -1, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for key, enabled := range container.Namespaces {
|
// TODO(vmarmol): If this gets too long, send it over a pipe to the child.
|
||||||
// skip the PID namespace on unshare because it it not supported
|
// Marshall the container into JSON since it won't be available in the namespace.
|
||||||
if enabled && key != "NEWPID" {
|
containerJson, err := json.Marshal(container)
|
||||||
if ns := libcontainer.GetNamespace(key); ns != nil {
|
|
||||||
if err := system.Unshare(ns.Value); err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fds, err := getNsFds(nspid, container)
|
|
||||||
closeFds := func() {
|
|
||||||
for _, f := range fds {
|
|
||||||
system.Closefd(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
closeFds()
|
return err
|
||||||
return -1, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(vmarmol): Move this to the container JSON.
|
||||||
processLabel, err := label.GetPidCon(nspid)
|
processLabel, err := label.GetPidCon(nspid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
closeFds()
|
return err
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
// foreach namespace fd, use setns to join an existing container's namespaces
|
|
||||||
for _, fd := range fds {
|
|
||||||
if fd > 0 {
|
|
||||||
if err := system.Setns(fd, 0); err != nil {
|
|
||||||
closeFds()
|
|
||||||
return -1, fmt.Errorf("setns %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
system.Closefd(fd)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the container has a new pid and mount namespace we need to
|
// Enter the namespace and then finish setup
|
||||||
// remount proc and sys to pick up the changes
|
finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)}
|
||||||
if container.Namespaces["NEWNS"] && container.Namespaces["NEWPID"] {
|
finalArgs = append(finalArgs, args...)
|
||||||
pid, err := system.Fork()
|
if err := system.Execv(finalArgs[0], finalArgs[0:], container.Env); err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
if pid == 0 {
|
|
||||||
// TODO: make all raw syscalls to be fork safe
|
|
||||||
if err := system.Unshare(syscall.CLONE_NEWNS); err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
if err := mount.RemountProc(); err != nil {
|
|
||||||
return -1, fmt.Errorf("remount proc %s", err)
|
|
||||||
}
|
|
||||||
if err := mount.RemountSys(); err != nil {
|
|
||||||
return -1, fmt.Errorf("remount sys %s", err)
|
|
||||||
}
|
|
||||||
goto dropAndExec
|
|
||||||
}
|
|
||||||
proc, err := os.FindProcess(pid)
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
state, err := proc.Wait()
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus())
|
|
||||||
}
|
|
||||||
dropAndExec:
|
|
||||||
if err := FinalizeNamespace(container); err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
err = label.SetProcessLabel(processLabel)
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
if err := system.Execv(args[0], args[0:], container.Env); err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
}
|
||||||
panic("unreachable")
|
panic("unreachable")
|
||||||
}
|
}
|
||||||
|
|
||||||
func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) {
|
// NsEnter is run after entering the namespace.
|
||||||
fds := []uintptr{}
|
func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error {
|
||||||
|
if err := FinalizeNamespace(container); err != nil {
|
||||||
for key, enabled := range container.Namespaces {
|
return err
|
||||||
if enabled {
|
|
||||||
if ns := libcontainer.GetNamespace(key); ns != nil {
|
|
||||||
f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", ns.File), os.O_RDONLY, 0)
|
|
||||||
if err != nil {
|
|
||||||
return fds, err
|
|
||||||
}
|
|
||||||
fds = append(fds, f.Fd())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return fds, nil
|
if err := label.SetProcessLabel(processLabel); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
panic("unreachable")
|
||||||
}
|
}
|
||||||
|
|
142
libcontainer/namespaces/nsenter.go
Normal file
142
libcontainer/namespaces/nsenter.go
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
package namespaces
|
||||||
|
|
||||||
|
/*
|
||||||
|
#include <dirent.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
static const kBufSize = 256;
|
||||||
|
|
||||||
|
void get_args(int *argc, char ***argv) {
|
||||||
|
// Read argv
|
||||||
|
int fd = open("/proc/self/cmdline", O_RDONLY);
|
||||||
|
|
||||||
|
// Read the whole commandline.
|
||||||
|
ssize_t contents_size = 0;
|
||||||
|
ssize_t contents_offset = 0;
|
||||||
|
char *contents = NULL;
|
||||||
|
ssize_t bytes_read = 0;
|
||||||
|
do {
|
||||||
|
contents_size += kBufSize;
|
||||||
|
contents = (char *) realloc(contents, contents_size);
|
||||||
|
bytes_read = read(fd, contents + contents_offset, contents_size - contents_offset);
|
||||||
|
contents_offset += bytes_read;
|
||||||
|
} while (bytes_read > 0);
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
// Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args.
|
||||||
|
ssize_t i;
|
||||||
|
*argc = 0;
|
||||||
|
for (i = 0; i < contents_offset; i++) {
|
||||||
|
if (contents[i] == '\0') {
|
||||||
|
(*argc)++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*argv = (char **) malloc(sizeof(char *) * ((*argc) + 1));
|
||||||
|
int idx;
|
||||||
|
for (idx = 0; idx < (*argc); idx++) {
|
||||||
|
(*argv)[idx] = contents;
|
||||||
|
contents += strlen(contents) + 1;
|
||||||
|
}
|
||||||
|
(*argv)[*argc] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nsenter() {
|
||||||
|
int argc;
|
||||||
|
char **argv;
|
||||||
|
get_args(&argc, &argv);
|
||||||
|
|
||||||
|
// Ignore if this is not for us.
|
||||||
|
if (argc < 2 || strcmp(argv[1], "nsenter") != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// USAGE: <binary> nsenter <PID> <process label> <container JSON> <argv>...
|
||||||
|
if (argc < 6) {
|
||||||
|
fprintf(stderr, "nsenter: Incorrect usage, not enough arguments\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
pid_t init_pid = strtol(argv[2], NULL, 10);
|
||||||
|
if (errno != 0 || init_pid <= 0) {
|
||||||
|
fprintf(stderr, "nsenter: Failed to parse PID from \"%s\" with error: \"%s\"\n", argv[2], strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
argc -= 3;
|
||||||
|
argv += 3;
|
||||||
|
|
||||||
|
// Setns on all supported namespaces.
|
||||||
|
char ns_dir[kBufSize];
|
||||||
|
memset(ns_dir, 0, kBufSize);
|
||||||
|
if (snprintf(ns_dir, kBufSize - 1, "/proc/%d/ns/", init_pid) < 0) {
|
||||||
|
fprintf(stderr, "nsenter: Error getting ns dir path with error: \"%s\"\n", strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
struct dirent *dent;
|
||||||
|
DIR *dir = opendir(ns_dir);
|
||||||
|
if (dir == NULL) {
|
||||||
|
fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
while((dent = readdir(dir)) != NULL) {
|
||||||
|
if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get and open the namespace for the init we are joining..
|
||||||
|
char buf[kBufSize];
|
||||||
|
memset(buf, 0, kBufSize);
|
||||||
|
strncat(buf, ns_dir, kBufSize - 1);
|
||||||
|
strncat(buf, dent->d_name, kBufSize - 1);
|
||||||
|
int fd = open(buf, O_RDONLY);
|
||||||
|
if (fd == -1) {
|
||||||
|
fprintf(stderr, "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", buf, dent->d_name, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the namespace.
|
||||||
|
if (setns(fd, 0) == -1) {
|
||||||
|
fprintf(stderr, "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", dent->d_name, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
closedir(dir);
|
||||||
|
|
||||||
|
// We must fork to actually enter the PID namespace.
|
||||||
|
int child = fork();
|
||||||
|
if (child == 0) {
|
||||||
|
// Finish executing, let the Go runtime take over.
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// Parent, wait for the child.
|
||||||
|
int status = 0;
|
||||||
|
if (waitpid(child, &status, 0) == -1) {
|
||||||
|
fprintf(stderr, "nsenter: Failed to waitpid with error: \"%s\"\n", strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward the child's exit code or re-send its death signal.
|
||||||
|
if (WIFEXITED(status)) {
|
||||||
|
exit(WEXITSTATUS(status));
|
||||||
|
} else if (WIFSIGNALED(status)) {
|
||||||
|
kill(getpid(), WTERMSIG(status));
|
||||||
|
}
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((constructor)) init() {
|
||||||
|
nsenter();
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
import "C"
|
|
@ -27,20 +27,20 @@ func main() {
|
||||||
log.Fatalf("invalid number of arguments %d", len(os.Args))
|
log.Fatalf("invalid number of arguments %d", len(os.Args))
|
||||||
}
|
}
|
||||||
|
|
||||||
container, err := loadContainer()
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("unable to load container: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch os.Args[1] {
|
switch os.Args[1] {
|
||||||
case "exec": // this is executed outside of the namespace in the cwd
|
case "exec": // this is executed outside of the namespace in the cwd
|
||||||
|
container, err := loadContainer()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("unable to load container: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
var nspid, exitCode int
|
var nspid, exitCode int
|
||||||
if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
|
if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
|
||||||
log.Fatalf("unable to read pid: %s", err)
|
log.Fatalf("unable to read pid: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if nspid > 0 {
|
if nspid > 0 {
|
||||||
exitCode, err = namespaces.ExecIn(container, nspid, os.Args[2:])
|
err = namespaces.ExecIn(container, nspid, os.Args[2:])
|
||||||
} else {
|
} else {
|
||||||
term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
|
term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
|
||||||
exitCode, err = startContainer(container, term, dataPath, os.Args[2:])
|
exitCode, err = startContainer(container, term, dataPath, os.Args[2:])
|
||||||
|
@ -50,7 +50,36 @@ func main() {
|
||||||
log.Fatalf("failed to exec: %s", err)
|
log.Fatalf("failed to exec: %s", err)
|
||||||
}
|
}
|
||||||
os.Exit(exitCode)
|
os.Exit(exitCode)
|
||||||
|
case "nsenter": // this is executed inside the namespace.
|
||||||
|
// nsinit nsenter <pid> <process label> <container JSON> <cmd>...
|
||||||
|
if len(os.Args) < 6 {
|
||||||
|
log.Fatalf("incorrect usage: nsinit nsenter <pid> <process label> <container JSON> <cmd>...")
|
||||||
|
}
|
||||||
|
|
||||||
|
container, err := loadContainerFromJson(os.Args[4])
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("unable to load container: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nspid, err := strconv.Atoi(os.Args[2])
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("unable to read pid: %s from %q", err, os.Args[2])
|
||||||
|
}
|
||||||
|
|
||||||
|
if nspid <= 0 {
|
||||||
|
log.Fatalf("cannot enter into namespaces without valid pid: %q", nspid)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = namespaces.NsEnter(container, os.Args[3], nspid, os.Args[5:])
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to nsenter: %s", err)
|
||||||
|
}
|
||||||
case "init": // this is executed inside of the namespace to setup the container
|
case "init": // this is executed inside of the namespace to setup the container
|
||||||
|
container, err := loadContainer()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("unable to load container: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
// by default our current dir is always our rootfs
|
// by default our current dir is always our rootfs
|
||||||
rootfs, err := os.Getwd()
|
rootfs, err := os.Getwd()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -70,6 +99,11 @@ func main() {
|
||||||
log.Fatalf("unable to initialize for container: %s", err)
|
log.Fatalf("unable to initialize for container: %s", err)
|
||||||
}
|
}
|
||||||
case "stats":
|
case "stats":
|
||||||
|
container, err := loadContainer()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("unable to load container: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
// returns the stats of the current container.
|
// returns the stats of the current container.
|
||||||
stats, err := getContainerStats(container)
|
stats, err := getContainerStats(container)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -80,6 +114,11 @@ func main() {
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
|
|
||||||
case "spec":
|
case "spec":
|
||||||
|
container, err := loadContainer()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("unable to load container: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
// returns the spec of the current container.
|
// returns the spec of the current container.
|
||||||
spec, err := getContainerSpec(container)
|
spec, err := getContainerSpec(container)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -90,13 +129,14 @@ func main() {
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
log.Fatalf("command not supported for nsinit %s", os.Args[0])
|
log.Fatalf("command not supported for nsinit %s", os.Args[1])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadContainer() (*libcontainer.Container, error) {
|
func loadContainer() (*libcontainer.Container, error) {
|
||||||
f, err := os.Open(filepath.Join(dataPath, "container.json"))
|
f, err := os.Open(filepath.Join(dataPath, "container.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Printf("Path: %q", filepath.Join(dataPath, "container.json"))
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
@ -108,6 +148,15 @@ func loadContainer() (*libcontainer.Container, error) {
|
||||||
return container, nil
|
return container, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func loadContainerFromJson(rawData string) (*libcontainer.Container, error) {
|
||||||
|
container := &libcontainer.Container{}
|
||||||
|
err := json.Unmarshal([]byte(rawData), container)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return container, nil
|
||||||
|
}
|
||||||
|
|
||||||
func readPid() (int, error) {
|
func readPid() (int, error) {
|
||||||
data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
|
data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
Loading…
Reference in a new issue