Revert "Switch to new vendor directory layout"

This reverts commit d5742209d3.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2016-03-17 16:10:24 -07:00
parent a21a956a2d
commit b4c901f34a
806 changed files with 7 additions and 2 deletions

View file

@ -1,25 +0,0 @@
## nsenter
The `nsenter` package registers a special init constructor that is called before
the Go runtime has a chance to boot. This provides us the ability to `setns` on
existing namespaces and avoid the issues that the Go runtime has with multiple
threads. This constructor will be called if this package is registered,
imported, in your go application.
The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/)
package. In cgo, if the import of "C" is immediately preceded by a comment, that comment,
called the preamble, is used as a header when compiling the C parts of the package.
So every time we import package `nsenter`, the C code function `nsexec()` would be
called. And package `nsenter` is now only imported in Docker execdriver, so every time
before we call `execdriver.Exec()`, that C code would run.
`nsexec()` will first check the environment variable `_LIBCONTAINER_INITPID`
which will give the process of the container that should be joined. Namespaces fd will
be found from `/proc/[pid]/ns` and set by `setns` syscall.
And then get the pipe number from `_LIBCONTAINER_INITPIPE`, error message could
be transfered through it. If tty is added, `_LIBCONTAINER_CONSOLE_PATH` will
have value and start a console for output.
Finally, `nsexec()` will clone a child process , exit the parent process and let
the Go runtime take over.

View file

@ -1,12 +0,0 @@
// +build linux,!gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"

View file

@ -1,25 +0,0 @@
// +build linux,gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"
// AlwaysFalse is here to stay false
// (and be exported so the compiler doesn't optimize out its reference)
var AlwaysFalse bool
func init() {
if AlwaysFalse {
// by referencing this C init() in a noop test, it will ensure the compiler
// links in the C function.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
C.init()
}
}

View file

@ -1,134 +0,0 @@
package nsenter
import (
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"strings"
"syscall"
"testing"
"github.com/opencontainers/runc/libcontainer"
"github.com/vishvananda/netlink/nl"
)
type pid struct {
Pid int `json:"Pid"`
}
func TestNsenterValidPaths(t *testing.T) {
args := []string{"nsenter-exec"}
parent, child, err := newPipe()
if err != nil {
t.Fatalf("failed to create pipe %v", err)
}
namespaces := []string{
// join pid ns of the current process
fmt.Sprintf("/proc/%d/ns/pid", os.Getpid()),
}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
ExtraFiles: []*os.File{child},
Env: []string{"_LIBCONTAINER_INITPIPE=3"},
Stdout: os.Stdout,
Stderr: os.Stderr,
}
if err := cmd.Start(); err != nil {
t.Fatalf("nsenter failed to start %v", err)
}
// write cloneFlags
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
r.AddData(&libcontainer.Int32msg{
Type: libcontainer.CloneFlagsAttr,
Value: uint32(syscall.CLONE_NEWNET),
})
r.AddData(&libcontainer.Bytemsg{
Type: libcontainer.NsPathsAttr,
Value: []byte(strings.Join(namespaces, ",")),
})
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
t.Fatal(err)
}
decoder := json.NewDecoder(parent)
var pid *pid
if err := decoder.Decode(&pid); err != nil {
dir, _ := ioutil.ReadDir(fmt.Sprintf("/proc/%d/ns", os.Getpid()))
for _, d := range dir {
t.Log(d.Name())
}
t.Fatalf("%v", err)
}
if err := cmd.Wait(); err != nil {
t.Fatalf("nsenter exits with a non-zero exit status")
}
p, err := os.FindProcess(pid.Pid)
if err != nil {
t.Fatalf("%v", err)
}
p.Wait()
}
func TestNsenterInvalidPaths(t *testing.T) {
args := []string{"nsenter-exec"}
parent, child, err := newPipe()
if err != nil {
t.Fatalf("failed to create pipe %v", err)
}
namespaces := []string{
// join pid ns of the current process
fmt.Sprintf("/proc/%d/ns/pid", -1),
}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
ExtraFiles: []*os.File{child},
Env: []string{"_LIBCONTAINER_INITPIPE=3"},
}
if err := cmd.Start(); err != nil {
t.Fatal(err)
}
// write cloneFlags
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
r.AddData(&libcontainer.Int32msg{
Type: libcontainer.CloneFlagsAttr,
Value: uint32(syscall.CLONE_NEWNET),
})
r.AddData(&libcontainer.Bytemsg{
Type: libcontainer.NsPathsAttr,
Value: []byte(strings.Join(namespaces, ",")),
})
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
t.Fatal(err)
}
if err := cmd.Wait(); err == nil {
t.Fatalf("nsenter exits with a zero exit status")
}
}
func init() {
if strings.HasPrefix(os.Args[0], "nsenter-") {
os.Exit(0)
}
return
}
func newPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}

View file

@ -1,5 +0,0 @@
// +build !linux !cgo
package nsenter
import "C"

View file

@ -1,460 +0,0 @@
#define _GNU_SOURCE
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/limits.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <sched.h>
#include <setjmp.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <grp.h>
#include <bits/sockaddr.h>
#include <linux/types.h>
// All arguments should be above the stack because it grows down
struct clone_arg {
/*
* Reserve some space for clone() to locate arguments
* and retcode in this place
*/
char stack[4096] __attribute__((aligned(16)));
char stack_ptr[0];
jmp_buf *env;
};
struct nsenter_config {
uint32_t cloneflags;
char *uidmap;
int uidmap_len;
char *gidmap;
int gidmap_len;
uint8_t is_setgroup;
};
// list of known message types we want to send to bootstrap program
// These are defined in libcontainer/message_linux.go
#define INIT_MSG 62000
#define CLONE_FLAGS_ATTR 27281
#define CONSOLE_PATH_ATTR 27282
#define NS_PATHS_ATTR 27283
#define UIDMAP_ATTR 27284
#define GIDMAP_ATTR 27285
#define SETGROUP_ATTR 27286
// Use raw setns syscall for versions of glibc that don't include it
// (namely glibc-2.12)
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
#define _GNU_SOURCE
#include "syscall.h"
#if defined(__NR_setns) && !defined(SYS_setns)
#define SYS_setns __NR_setns
#endif
#ifdef SYS_setns
int setns(int fd, int nstype)
{
return syscall(SYS_setns, fd, nstype);
}
#endif
#endif
#define pr_perror(fmt, ...) \
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__)
static int child_func(void *_arg)
{
struct clone_arg *arg = (struct clone_arg *)_arg;
longjmp(*arg->env, 1);
}
static int clone_parent(jmp_buf *env, int flags) __attribute__((noinline));
static int clone_parent(jmp_buf *env, int flags)
{
struct clone_arg ca;
int child;
ca.env = env;
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD | flags,
&ca);
if (child == -1 && errno == EINVAL) {
if (unshare(flags)) {
pr_perror("Unable to unshare namespaces");
return -1;
}
child = clone(child_func, ca.stack_ptr, SIGCHLD | CLONE_PARENT,
&ca);
}
return child;
}
// get init pipe from the parent. It's used to read bootstrap data, and to
// write pid to after nsexec finishes setting up the environment.
static int get_init_pipe()
{
char buf[PATH_MAX];
char *initpipe;
int pipenum = -1;
initpipe = getenv("_LIBCONTAINER_INITPIPE");
if (initpipe == NULL) {
return -1;
}
pipenum = atoi(initpipe);
snprintf(buf, sizeof(buf), "%d", pipenum);
if (strcmp(initpipe, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_INITPIPE");
exit(1);
}
return pipenum;
}
// num_namespaces returns the number of additional namespaces to setns. The
// argument is a comma-separated string of namespace paths.
static int num_namespaces(char *nspaths)
{
int i;
int size = 0;
for (i = 0; nspaths[i]; i++) {
if (nspaths[i] == ',') {
size += 1;
}
}
return size + 1;
}
static uint32_t readint32(char *buf)
{
return *(uint32_t *)buf;
}
static uint8_t readint8(char *buf)
{
return *(uint8_t *)buf;
}
static void update_process_idmap(char *pathfmt, int pid, char *map, int map_len)
{
char buf[PATH_MAX];
int len;
int fd;
len = snprintf(buf, sizeof(buf), pathfmt, pid);
if (len < 0) {
pr_perror("failed to construct '%s' for %d", pathfmt, pid);
exit(1);
}
fd = open(buf, O_RDWR);
if (fd == -1) {
pr_perror("failed to open %s", buf);
exit(1);
}
len = write(fd, map, map_len);
if (len == -1) {
pr_perror("failed to write to %s", buf);
exit(1);
} else if (len != map_len) {
fprintf(stderr, "Failed to write data to %s (%d/%d)",
buf, len, map_len);
exit(1);
}
close(fd);
}
static void update_process_uidmap(int pid, char *map, int map_len)
{
if ((map == NULL) || (map_len <= 0)) {
return;
}
update_process_idmap("/proc/%d/uid_map", pid, map, map_len);
}
static void update_process_gidmap(int pid, uint8_t is_setgroup, char *map, int map_len)
{
if ((map == NULL) || (map_len <= 0)) {
return;
}
if (is_setgroup == 1) {
int fd;
int len;
char buf[PATH_MAX];
len = snprintf(buf, sizeof(buf), "/proc/%d/setgroups", pid);
if (len < 0) {
pr_perror("failed to get setgroups path for %d", pid);
exit(1);
}
fd = open(buf, O_RDWR);
if (fd == -1) {
pr_perror("failed to open %s", buf);
exit(1);
}
if (write(fd, "allow", 5) != 5) {
// If the kernel is too old to support
// /proc/PID/setgroups, write will return
// ENOENT; this is OK.
if (errno != ENOENT) {
pr_perror("failed to write allow to %s", buf);
exit(1);
}
}
close(fd);
}
update_process_idmap("/proc/%d/gid_map", pid, map, map_len);
}
static void start_child(int pipenum, jmp_buf *env, int syncpipe[2],
struct nsenter_config *config)
{
int len;
int childpid;
char buf[PATH_MAX];
uint8_t syncbyte = 1;
// We must fork to actually enter the PID namespace, use CLONE_PARENT
// so the child can have the right parent, and we don't need to forward
// the child's exit code or resend its death signal.
childpid = clone_parent(env, config->cloneflags);
if (childpid < 0) {
pr_perror("Unable to fork");
exit(1);
}
// update uid_map and gid_map for the child process if they
// were provided
update_process_uidmap(childpid, config->uidmap, config->uidmap_len);
update_process_gidmap(childpid, config->is_setgroup, config->gidmap, config->gidmap_len);
// Send the sync signal to the child
close(syncpipe[0]);
syncbyte = 1;
if (write(syncpipe[1], &syncbyte, 1) != 1) {
pr_perror("failed to write sync byte to child");
exit(1);
}
// Send the child pid back to our parent
len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", childpid);
if ((len < 0) || (write(pipenum, buf, len) != len)) {
pr_perror("Unable to send a child pid");
kill(childpid, SIGKILL);
exit(1);
}
exit(0);
}
static void process_nl_attributes(int pipenum, char *data, int data_size)
{
jmp_buf env;
struct nsenter_config config = {0};
struct nlattr *nlattr;
int payload_len;
int start = 0;
int consolefd = -1;
int syncpipe[2] = {-1, -1};
while (start < data_size) {
nlattr = (struct nlattr *)(data + start);
start += NLA_HDRLEN;
payload_len = nlattr->nla_len - NLA_HDRLEN;
if (nlattr->nla_type == CLONE_FLAGS_ATTR) {
config.cloneflags = readint32(data + start);
} else if (nlattr->nla_type == CONSOLE_PATH_ATTR) {
// get the console path before setns because it may
// change mnt namespace
consolefd = open(data + start, O_RDWR);
if (consolefd < 0) {
pr_perror("Failed to open console %s",
data + start);
exit(1);
}
} else if (nlattr->nla_type == NS_PATHS_ATTR) {
// if custom namespaces are required, open all
// descriptors and perform setns on them
int i;
int nslen = num_namespaces(data + start);
int fds[nslen];
char *nslist[nslen];
char *ns;
char *saveptr;
for (i = 0; i < nslen; i++) {
char *str = NULL;
if (i == 0) {
str = data + start;
}
ns = strtok_r(str, ",", &saveptr);
if (ns == NULL) {
break;
}
fds[i] = open(ns, O_RDONLY);
if (fds[i] == -1) {
pr_perror("Failed to open %s", ns);
exit(1);
}
nslist[i] = ns;
}
for (i = 0; i < nslen; i++) {
if (setns(fds[i], 0) != 0) {
pr_perror("Failed to setns to %s", nslist[i]);
exit(1);
}
close(fds[i]);
}
} else if (nlattr->nla_type == UIDMAP_ATTR) {
config.uidmap = data + start;
config.uidmap_len = payload_len;
} else if (nlattr->nla_type == GIDMAP_ATTR) {
config.gidmap = data + start;
config.gidmap_len = payload_len;
} else if (nlattr->nla_type == SETGROUP_ATTR) {
config.is_setgroup = readint8(data + start);
} else {
pr_perror("Unknown netlink message type %d",
nlattr->nla_type);
exit(1);
}
start += NLA_ALIGN(payload_len);
}
// required clone_flags to be passed
if (config.cloneflags == -1) {
pr_perror("Missing clone_flags");
exit(1);
}
// prepare sync pipe between parent and child. We need this to let the
// child
// know that the parent has finished setting up
if (pipe(syncpipe) != 0) {
pr_perror("Failed to setup sync pipe between parent and child");
exit(1);
}
if (setjmp(env) == 1) {
// Child
uint8_t s = 0;
// close the writing side of pipe
close(syncpipe[1]);
// sync with parent
if ((read(syncpipe[0], &s, 1) != 1) || (s != 1)) {
pr_perror("Failed to read sync byte from parent");
exit(1);
}
if (setsid() == -1) {
pr_perror("setsid failed");
exit(1);
}
if (setuid(0) == -1) {
pr_perror("setuid failed");
exit(1);
}
if (setgid(0) == -1) {
pr_perror("setgid failed");
exit(1);
}
if (setgroups(0, NULL) == -1) {
pr_perror("setgroups failed");
exit(1);
}
if (consolefd != -1) {
if (ioctl(consolefd, TIOCSCTTY, 0) == -1) {
pr_perror("ioctl TIOCSCTTY failed");
exit(1);
}
if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) {
pr_perror("Failed to dup stdin");
exit(1);
}
if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) {
pr_perror("Failed to dup stdout");
exit(1);
}
if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) {
pr_perror("Failed to dup stderr");
exit(1);
}
}
// Finish executing, let the Go runtime take over.
return;
}
// Parent
start_child(pipenum, &env, syncpipe, &config);
}
void nsexec(void)
{
int pipenum;
// if we dont have init pipe, then just return to the parent
pipenum = get_init_pipe();
if (pipenum == -1) {
return;
}
// Retrieve the netlink header
struct nlmsghdr nl_msg_hdr;
int len;
if ((len = read(pipenum, &nl_msg_hdr, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
pr_perror("Invalid netlink header length %d", len);
exit(1);
}
if (nl_msg_hdr.nlmsg_type == NLMSG_ERROR) {
pr_perror("Failed to read netlink message");
exit(1);
}
if (nl_msg_hdr.nlmsg_type != INIT_MSG) {
pr_perror("Unexpected msg type %d", nl_msg_hdr.nlmsg_type);
exit(1);
}
// Retrieve data
int nl_total_size = NLMSG_PAYLOAD(&nl_msg_hdr, 0);
char data[nl_total_size];
if ((len = read(pipenum, data, nl_total_size)) != nl_total_size) {
pr_perror("Failed to read netlink payload, %d != %d", len,
nl_total_size);
exit(1);
}
process_nl_attributes(pipenum, data, nl_total_size);
}