initial commit

This commit is contained in:
Vincent Batts 2014-08-08 22:36:39 -04:00
commit a4395189ad
5 changed files with 608 additions and 0 deletions

66
demo_userns.c Normal file
View file

@ -0,0 +1,66 @@
/* demo_userns.c
Copyright 2013, Michael Kerrisk
Licensed under GNU General Public License v2 or later
Demonstrate the use of the clone() CLONE_NEWUSER flag.
Link with "-lcap" and make sure that the "libcap-devel" (or
similar) package is installed on the system.
*/
#define _GNU_SOURCE
#include <sys/capability.h>
#include <sys/wait.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
static int /* Startup function for cloned child */
childFunc(void *arg)
{
cap_t caps;
for (;;) {
printf("eUID = %ld; eGID = %ld; ",
(long) geteuid(), (long) getegid());
caps = cap_get_proc();
printf("capabilities: %s\n", cap_to_text(caps, NULL));
if (arg == NULL)
break;
sleep(5);
}
return 0;
}
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE]; /* Space for child's stack */
int
main(int argc, char *argv[])
{
pid_t pid;
/* Create child; child commences execution in childFunc() */
pid = clone(childFunc, child_stack + STACK_SIZE, /* Assume stack
grows downward */
CLONE_NEWUSER | SIGCHLD, argv[1]);
if (pid == -1)
errExit("clone");
/* Parent falls through to here. Wait for child. */
if (waitpid(pid, NULL, 0) == -1)
errExit("waitpid");
exit(EXIT_SUCCESS);
}

97
ns_child_exec.c Normal file
View file

@ -0,0 +1,97 @@
/* ns_child_exec.c
Copyright 2013, Michael Kerrisk
Licensed under GNU General Public License v2 or later
Create a child process that executes a shell command in new namespace(s).
*/
#define _GNU_SOURCE
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <signal.h>
#include <stdio.h>
/* A simple error-handling function: print an error message based
on the value in 'errno' and terminate the calling process */
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
static void
usage(char *pname)
{
fprintf(stderr, "Usage: %s [options] cmd [arg...]\n", pname);
fprintf(stderr, "Options can be:\n");
fprintf(stderr, " -i new IPC namespace\n");
fprintf(stderr, " -m new mount namespace\n");
fprintf(stderr, " -n new network namespace\n");
fprintf(stderr, " -p new PID namespace\n");
fprintf(stderr, " -u new UTS namespace\n");
fprintf(stderr, " -U new user namespace\n");
fprintf(stderr, " -v Display verbose messages\n");
exit(EXIT_FAILURE);
}
static int /* Start function for cloned child */
childFunc(void *arg)
{
char **argv = arg;
execvp(argv[0], &argv[0]);
errExit("execvp");
}
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE]; /* Space for child's stack */
int
main(int argc, char *argv[])
{
int flags, opt, verbose;
pid_t child_pid;
flags = 0;
verbose = 0;
/* Parse command-line options. The initial '+' character in
the final getopt() argument prevents GNU-style permutation
of command-line options. That's useful, since sometimes
the 'command' to be executed by this program itself
has command-line options. We don't want getopt() to treat
those as options to this program. */
while ((opt = getopt(argc, argv, "+imnpuUv")) != -1) {
switch (opt) {
case 'i': flags |= CLONE_NEWIPC; break;
case 'm': flags |= CLONE_NEWNS; break;
case 'n': flags |= CLONE_NEWNET; break;
case 'p': flags |= CLONE_NEWPID; break;
case 'u': flags |= CLONE_NEWUTS; break;
case 'U': flags |= CLONE_NEWUSER; break;
case 'v': verbose = 1; break;
default: usage(argv[0]);
}
}
child_pid = clone(childFunc,
child_stack + STACK_SIZE,
flags | SIGCHLD, &argv[optind]);
if (child_pid == -1)
errExit("clone");
if (verbose)
printf("%s: PID of child created by clone() is %ld\n",
argv[0], (long) child_pid);
/* Parent falls through to here */
if (waitpid(child_pid, NULL, 0) == -1) /* Wait for child */
errExit("waitpid");
if (verbose)
printf("%s: terminating\n", argv[0]);
exit(EXIT_SUCCESS);
}

31
setns.c Normal file
View file

@ -0,0 +1,31 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
int
main(int argc, char *argv[])
{
int fd;
if (argc < 3) {
fprintf(stderr, "%s /proc/PID/ns/FILE cmd args...\n", argv[0]);
exit(EXIT_FAILURE);
}
fd = open(argv[1], O_RDONLY); /* Get descriptor for namespace */
if (fd == -1)
errExit("open");
//if (setns(fd, CLONE_NEWNS) == -1)
if (setns(fd, 0) == -1) /* Join that namespace */
errExit("setns");
execvp(argv[2], &argv[2]); /* Execute a command in namespace */
errExit("execvp");
}

188
simple_init.c Normal file
View file

@ -0,0 +1,188 @@
/* simple_init.c
Copyright 2013, Michael Kerrisk
Licensed under GNU General Public License v2 or later
A simple init(1)-style program to be used as the init program in
a PID namespace. The program reaps the status of its children and
provides a simple shell facility for executing commands.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <wordexp.h>
#include <errno.h>
#include <sys/wait.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
static int verbose = 0;
/* Display wait status (from waitpid() or similar) given in 'status' */
/* SIGCHLD handler: reap child processes as they change state */
static void
child_handler(int sig)
{
pid_t pid;
int status;
/* WUNTRACED and WCONTINUED allow waitpid() to catch stopped and
continued children (in addition to terminated children) */
while ((pid = waitpid(-1, &status,
WNOHANG | WUNTRACED | WCONTINUED)) != 0) {
if (pid == -1) {
if (errno == ECHILD) /* No more children */
break;
else
perror("waitpid"); /* Unexpected error */
}
if (verbose)
printf("\tinit: SIGCHLD handler: PID %ld terminated\n",
(long) pid);
}
}
/* Perform word expansion on string in 'cmd', allocating and
returning a vector of words on success or NULL on failure */
static char **
expand_words(char *cmd)
{
char **arg_vec;
int s;
wordexp_t pwordexp;
s = wordexp(cmd, &pwordexp, 0);
if (s != 0) {
fprintf(stderr, "Word expansion failed\n");
return NULL;
}
arg_vec = calloc(pwordexp.we_wordc + 1, sizeof(char *));
if (arg_vec == NULL)
errExit("calloc");
for (s = 0; s < pwordexp.we_wordc; s++)
arg_vec[s] = pwordexp.we_wordv[s];
arg_vec[pwordexp.we_wordc] = NULL;
return arg_vec;
}
static void
usage(char *pname)
{
fprintf(stderr, "Usage: %s [-q]\n", pname);
fprintf(stderr, "\t-v\tProvide verbose logging\n");
exit(EXIT_FAILURE);
}
int
main(int argc, char *argv[])
{
struct sigaction sa;
#define CMD_SIZE 10000
char cmd[CMD_SIZE];
pid_t pid;
int opt;
while ((opt = getopt(argc, argv, "v")) != -1) {
switch (opt) {
case 'v': verbose = 1; break;
default: usage(argv[0]);
}
}
sa.sa_flags = SA_RESTART | SA_NOCLDSTOP;
sigemptyset(&sa.sa_mask);
sa.sa_handler = child_handler;
if (sigaction(SIGCHLD, &sa, NULL) == -1)
errExit("sigaction");
if (verbose)
printf("\tinit: my PID is %ld\n", (long) getpid());
/* Performing terminal operations while not being the foreground
process group for the terminal generates a SIGTTOU that stops the
process. However our init "shell" needs to be able to perform
such operations (just like a normal shell), so we ignore that
signal, which allows the operations to proceed successfully. */
signal(SIGTTOU, SIG_IGN);
/* Become leader of a new process group and make that process
group the foreground process group for the terminal */
if (setpgid(0, 0) == -1)
errExit("setpgid");;
if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1)
errExit("tcsetpgrp-child");
while (1) {
/* Read a shell command; exit on end of file */
printf("init$ ");
if (fgets(cmd, CMD_SIZE, stdin) == NULL) {
if (verbose)
printf("\tinit: exiting");
printf("\n");
exit(EXIT_SUCCESS);
}
if (cmd[strlen(cmd) - 1] == '\n')
cmd[strlen(cmd) - 1] = '\0'; /* Strip trailing '\n' */
if (strlen(cmd) == 0)
continue; /* Ignore empty commands */
pid = fork(); /* Create child process */
if (pid == -1)
errExit("fork");
if (pid == 0) { /* Child */
char **arg_vec;
arg_vec = expand_words(cmd);
if (arg_vec == NULL) /* Word expansion failed */
continue;
/* Make child the leader of a new process group and
make that process group the foreground process
group for the terminal */
if (setpgid(0, 0) == -1)
errExit("setpgid");;
if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1)
errExit("tcsetpgrp-child");
/* Child executes shell command and terminates */
execvp(arg_vec[0], arg_vec);
errExit("execvp"); /* Only reached if execvp() fails */
}
/* Parent falls through to here */
if (verbose)
printf("\tinit: created child %ld\n", (long) pid);
pause(); /* Will be interrupted by signal handler */
/* After child changes state, ensure that the 'init' program
is the foreground process group for the terminal */
if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1)
errExit("tcsetpgrp-parent");
}
}

226
userns_child_exec.c Normal file
View file

@ -0,0 +1,226 @@
/* userns_child_exec.c
Copyright 2013, Michael Kerrisk
Licensed under GNU General Public License v2 or later
Create a child process that executes a shell command in new
namespace(s); allow UID and GID mappings to be specified when
creating a user namespace.
*/
#define _GNU_SOURCE
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <signal.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
/* A simple error-handling function: print an error message based
on the value in 'errno' and terminate the calling process */
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
struct child_args {
char **argv; /* Command to be executed by child, with arguments */
int pipe_fd[2]; /* Pipe used to synchronize parent and child */
};
static int verbose;
static void
usage(char *pname)
{
fprintf(stderr, "Usage: %s [options] cmd [arg...]\n\n", pname);
fprintf(stderr, "Create a child process that executes a shell command "
"in a new user namespace,\n"
"and possibly also other new namespace(s).\n\n");
fprintf(stderr, "Options can be:\n\n");
#define fpe(str) fprintf(stderr, " %s", str);
fpe("-i New IPC namespace\n");
fpe("-m New mount namespace\n");
fpe("-n New network namespace\n");
fpe("-p New PID namespace\n");
fpe("-u New UTS namespace\n");
fpe("-U New user namespace\n");
fpe("-M uid_map Specify UID map for user namespace\n");
fpe("-G gid_map Specify GID map for user namespace\n");
fpe(" If -M or -G is specified, -U is required\n");
fpe("-v Display verbose messages\n");
fpe("\n");
fpe("Map strings for -M and -G consist of records of the form:\n");
fpe("\n");
fpe(" ID-inside-ns ID-outside-ns len\n");
fpe("\n");
fpe("A map string can contain multiple records, separated by commas;\n");
fpe("the commas are replaced by newlines before writing to map files.\n");
exit(EXIT_FAILURE);
}
/* Update the mapping file 'map_file', with the value provided in
'mapping', a string that defines a UID or GID mapping. A UID or
GID mapping consists of one or more newline-delimited records
of the form:
ID_inside-ns ID-outside-ns length
Requiring the user to supply a string that contains newlines is
of course inconvenient for command-line use. Thus, we permit the
use of commas to delimit records in this string, and replace them
with newlines before writing the string to the file. */
static void
update_map(char *mapping, char *map_file)
{
int fd, j;
size_t map_len; /* Length of 'mapping' */
/* Replace commas in mapping string with newlines */
map_len = strlen(mapping);
for (j = 0; j < map_len; j++)
if (mapping[j] == ',')
mapping[j] = '\n';
fd = open(map_file, O_RDWR);
if (fd == -1) {
fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
if (write(fd, mapping, map_len) != map_len) {
fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
close(fd);
}
static int /* Start function for cloned child */
childFunc(void *arg)
{
struct child_args *args = (struct child_args *) arg;
char ch;
/* Wait until the parent has updated the UID and GID mappings. See
the comment in main(). We wait for end of file on a pipe that will
be closed by the parent process once it has updated the mappings. */
close(args->pipe_fd[1]); /* Close our descriptor for the write end
of the pipe so that we see EOF when
parent closes its descriptor */
if (read(args->pipe_fd[0], &ch, 1) != 0) {
fprintf(stderr, "Failure in child: read from pipe returned != 0\n");
exit(EXIT_FAILURE);
}
/* Execute a shell command */
execvp(args->argv[0], args->argv);
errExit("execvp");
}
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE]; /* Space for child's stack */
int
main(int argc, char *argv[])
{
int flags, opt;
pid_t child_pid;
struct child_args args;
char *uid_map, *gid_map;
char map_path[PATH_MAX];
/* Parse command-line options. The initial '+' character in
the final getopt() argument prevents GNU-style permutation
of command-line options. That's useful, since sometimes
the 'command' to be executed by this program itself
has command-line options. We don't want getopt() to treat
those as options to this program. */
flags = 0;
verbose = 0;
gid_map = NULL;
uid_map = NULL;
while ((opt = getopt(argc, argv, "+imnpuUM:G:v")) != -1) {
switch (opt) {
case 'i': flags |= CLONE_NEWIPC; break;
case 'm': flags |= CLONE_NEWNS; break;
case 'n': flags |= CLONE_NEWNET; break;
case 'p': flags |= CLONE_NEWPID; break;
case 'u': flags |= CLONE_NEWUTS; break;
case 'v': verbose = 1; break;
case 'M': uid_map = optarg; break;
case 'G': gid_map = optarg; break;
case 'U': flags |= CLONE_NEWUSER; break;
default: usage(argv[0]);
}
}
/* -M or -G without -U is nonsensical */
if ((uid_map != NULL || gid_map != NULL) &&
!(flags & CLONE_NEWUSER))
usage(argv[0]);
args.argv = &argv[optind];
/* We use a pipe to synchronize the parent and child, in order to
ensure that the parent sets the UID and GID maps before the child
calls execve(). This ensures that the child maintains its
capabilities during the execve() in the common case where we
want to map the child's effective user ID to 0 in the new user
namespace. Without this synchronization, the child would lose
its capabilities if it performed an execve() with nonzero
user IDs (see the capabilities(7) man page for details of the
transformation of a process's capabilities during execve()). */
if (pipe(args.pipe_fd) == -1)
errExit("pipe");
/* Create the child in new namespace(s) */
child_pid = clone(childFunc, child_stack + STACK_SIZE,
flags | SIGCHLD, &args);
if (child_pid == -1)
errExit("clone");
/* Parent falls through to here */
if (verbose)
printf("%s: PID of child created by clone() is %ld\n",
argv[0], (long) child_pid);
/* Update the UID and GID maps in the child */
if (uid_map != NULL) {
snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
(long) child_pid);
update_map(uid_map, map_path);
}
if (gid_map != NULL) {
snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
(long) child_pid);
update_map(gid_map, map_path);
}
/* Close the write end of the pipe, to signal to the child that we
have updated the UID and GID maps */
close(args.pipe_fd[1]);
if (waitpid(child_pid, NULL, 0) == -1) /* Wait for child */
errExit("waitpid");
if (verbose)
printf("%s: terminating\n", argv[0]);
exit(EXIT_SUCCESS);
}