update main to use vendor

Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
This commit is contained in:
Jess Frazelle 2018-03-19 21:44:18 -04:00
parent 639756e8c6
commit e346c2e0ba
9 changed files with 1102 additions and 1003 deletions

99
main.go
View file

@ -5,16 +5,17 @@ import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"github.com/Sirupsen/logrus"
aaprofile "github.com/docker/docker/profiles/apparmor"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/apparmor"
_ "github.com/opencontainers/runc/libcontainer/nsenter"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runc/libcontainer/specconv"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
const (
@ -40,15 +41,14 @@ const (
)
var (
console = os.Getenv("console")
containerID string
pidFile string
root string
allocateTty bool
detach bool
readonly bool
useSystemdCgroup bool
allocateTty bool
consoleSocket string
detach bool
readonly bool
hooks specs.Hooks
hookflags stringSlice
@ -113,16 +113,14 @@ func (s stringSlice) ParseHooks() (hooks specs.Hooks, err error) {
func init() {
// Parse flags
flag.StringVar(&containerID, "id", IMAGE, "container ID")
flag.StringVar(&console, "console", console, "the pty slave path for use with the container")
flag.StringVar(&pidFile, "pid-file", "", "specify the file to write the process id to")
flag.StringVar(&root, "root", defaultRoot, "root directory of container state, should be tmpfs")
flag.Var(&hookflags, "hook", "Hooks to prefill into spec file. (ex. --hook prestart:netns)")
flag.BoolVar(&allocateTty, "t", true, "allocate a tty for the container")
flag.StringVar(&consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
flag.BoolVar(&detach, "d", false, "detach from the container's process")
// TODO (jess): do not enable this flag, the error is very gross on systemd
// flag.BoolVar(&useSystemdCgroup, "systemd-cgroup", false, "enable systemd cgroup support")
flag.BoolVar(&readonly, "read-only", false, "make container filesystem readonly")
flag.BoolVar(&version, "version", false, "print version and exit")
@ -141,17 +139,33 @@ func init() {
os.Exit(0)
}
// Set log level
// Set log level.
if debug {
logrus.SetLevel(logrus.DebugLevel)
}
// parse the hook flags
// Parse the hook flags.
var err error
hooks, err = hookflags.ParseHooks()
if err != nil {
logrus.Fatal(err)
}
// Convert pid-file to an absolute path so we can write to the
// right file after chdir to bundle.
if pidFile != "" {
pidFile, err = filepath.Abs(pidFile)
if err != nil {
logrus.Fatal(err)
}
}
// Get the absolute path to the root.
root, err = filepath.Abs(root)
if err != nil {
logrus.Fatal(err)
}
}
//go:generate go run generate.go
@ -161,28 +175,27 @@ func main() {
return
}
notifySocket := os.Getenv("NOTIFY_SOCKET")
if notifySocket != "" {
setupSdNotify(spec, notifySocket)
}
// Initialize the spec.
spec := specconv.Example()
// override the cmd in the spec with any args specified
if len(flag.Args()) > 0 {
spec.Process.Args = flag.Args()
}
// Set the spec to be rootless.
specconv.ToRootless(spec)
// setup readonly fs in spec
// Setup readonly fs in spec.
spec.Root.Readonly = readonly
// setup tty in spec
// Setup tty in spec.
spec.Process.Terminal = allocateTty
// pass in any hooks
spec.Hooks = hooks
// Pass in any hooks to the spec.
spec.Hooks = &hooks
// install the default apparmor profile
// Set the default seccomp profile.
spec.Linux.Seccomp = defaultSeccompProfile
// Install the default apparmor profile.
if apparmor.IsEnabled() {
// check if we have the docker-default apparmor profile loaded
// Check if we have the docker-default apparmor profile loaded.
if _, err := aaprofile.IsLoaded(defaultApparmorProfile); err != nil {
logrus.Warnf("AppArmor enabled on system but the %s profile is not loaded. apparmor_parser needs root to load a profile so we can't do it for you.", defaultApparmorProfile)
} else {
@ -190,45 +203,23 @@ func main() {
}
}
// set the CgroupsPath as this user
u, err := user.CurrentUser()
if err != nil {
logrus.Fatal(err)
}
spec.Linux.CgroupsPath = sPtr(u.Name)
// setup UID mappings
spec.Linux.UIDMappings = []specs.IDMapping{
{
HostID: uint32(u.Uid),
ContainerID: 0,
Size: 1,
},
}
// setup GID mappings
spec.Linux.GIDMappings = []specs.IDMapping{
{
HostID: uint32(u.Gid),
ContainerID: 0,
Size: 1,
},
}
// Unpack the rootfs.
if err := unpackRootfs(spec); err != nil {
logrus.Fatal(err)
}
status, err := startContainer(spec, containerID, pidFile, detach, useSystemdCgroup)
// Start the container.
status, err := startContainer(spec, containerID, pidFile, consoleSocket, root, detach)
if err != nil {
logrus.Fatal(err)
}
// Remove the rootfs after the container has exited.
if err := os.RemoveAll(defaultRootfsDir); err != nil {
logrus.Warnf("removing rootfs failed: %v", err)
}
// exit with the container's exit status
// Exit with the container's exit status.
os.Exit(status)
}

106
notify_socket.go Normal file
View file

@ -0,0 +1,106 @@
package main
import (
"bytes"
"fmt"
"net"
"os"
"path/filepath"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type notifySocket struct {
socket *net.UnixConn
host string
socketPath string
}
func newNotifySocket(id, root string) *notifySocket {
if os.Getenv("NOTIFY_SOCKET") == "" {
// Return early if we do not have a NOTIFY_SOCKET.
return nil
}
path := filepath.Join(filepath.Join(root, id), "notify.sock")
notifySocket := &notifySocket{
socket: nil,
host: os.Getenv("NOTIFY_SOCKET"),
socketPath: path,
}
return notifySocket
}
func (s *notifySocket) Close() error {
return s.socket.Close()
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func (s *notifySocket) setupSpec(spec *specs.Spec) {
mount := specs.Mount{Destination: s.host, Type: "bind", Source: s.socketPath, Options: []string{"bind"}}
spec.Mounts = append(spec.Mounts, mount)
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
}
func (s *notifySocket) setupSocket() error {
addr := net.UnixAddr{
Name: s.socketPath,
Net: "unixgram",
}
socket, err := net.ListenUnixgram("unixgram", &addr)
if err != nil {
return err
}
s.socket = socket
return nil
}
// pid1 must be set only with -d, as it is used to set the new process as the main process
// for the service in butts
func (notifySocket *notifySocket) run(pid1 int) {
buf := make([]byte, 512)
notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
if err != nil {
logrus.Error(err)
return
}
for {
r, err := notifySocket.socket.Read(buf)
if err != nil {
break
}
var out bytes.Buffer
for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
if bytes.HasPrefix(line, []byte("READY=")) {
_, err = out.Write(line)
if err != nil {
return
}
_, err = out.Write([]byte{'\n'})
if err != nil {
return
}
_, err = client.Write(out.Bytes())
if err != nil {
return
}
// now we can inform butts to use pid1 as the pid to monitor
if pid1 > 0 {
newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
client.Write([]byte(newPid))
}
return
}
}
}
}

View file

@ -43,7 +43,7 @@ var rlimitMap = map[string]int{
func strToRlimit(key string) (int, error) {
rl, ok := rlimitMap[key]
if !ok {
return 0, fmt.Errorf("Wrong rlimit value: %s", key)
return 0, fmt.Errorf("wrong rlimit value: %s", key)
}
return rl, nil
}

View file

@ -22,7 +22,7 @@ func unpackRootfs(spec *specs.Spec) error {
}
r := bytes.NewReader(data)
if err := archive.Untar(r, defaultRootfsDir, nil); err != nil {
if err := archive.Untar(r, defaultRootfsDir, &archive.TarOptions{NoLchown: true}); err != nil {
return err
}

1272
seccomp.go

File diff suppressed because it is too large Load diff

View file

@ -3,19 +3,23 @@ package main
import (
"os"
"os/signal"
"syscall"
"syscall" // only for Signal
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
const signalBufferSize = 2048
// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals
// while still forwarding all other signals to the process.
func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler {
// If notifySocket is present, use it to read butts notifications from the container and
// forward them to notifySocketHost.
func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) *signalHandler {
if enableSubreaper {
// set us as the subreaper before registering the signal handler for the container
if err := system.SetSubreaper(1); err != nil {
@ -28,8 +32,8 @@ func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler {
// handle all signals for the process.
signal.Notify(s)
return &signalHandler{
tty: tty,
signals: s,
signals: s,
notifySocket: notifySocket,
}
}
@ -41,26 +45,43 @@ type exit struct {
}
type signalHandler struct {
signals chan os.Signal
tty *tty
signals chan os.Signal
notifySocket *notifySocket
}
// forward handles the main signal event loop forwarding, resizing, or reaping depending
// on the signal received.
func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach bool) (int, error) {
// make sure we know the pid of our main process so that we can return
// after it dies.
if detach && h.notifySocket == nil {
return 0, nil
}
pid1, err := process.Pid()
if err != nil {
return -1, err
}
// perform the initial tty resize.
h.tty.resize()
if h.notifySocket != nil {
if detach {
h.notifySocket.run(pid1)
return 0, nil
} else {
go h.notifySocket.run(0)
}
}
// Perform the initial tty resize. Always ignore errors resizing because
// stdout might have disappeared (due to races with when SIGHUP is sent).
_ = tty.resize()
// Handle and forward signals.
for s := range h.signals {
switch s {
case syscall.SIGWINCH:
h.tty.resize()
case syscall.SIGCHLD:
case unix.SIGWINCH:
// Ignore errors resizing, as above.
_ = tty.resize()
case unix.SIGCHLD:
exits, err := h.reap()
if err != nil {
logrus.Error(err)
@ -75,12 +96,15 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
// status because we must ensure that any of the go specific process
// fun such as flushing pipes are complete before we return.
process.Wait()
if h.notifySocket != nil {
h.notifySocket.Close()
}
return e.status, nil
}
}
default:
logrus.Debugf("sending signal to process %s", s)
if err := syscall.Kill(pid1, s.(syscall.Signal)); err != nil {
if err := unix.Kill(pid1, s.(syscall.Signal)); err != nil {
logrus.Error(err)
}
}
@ -92,13 +116,13 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
// then returns all exits to the main event loop for further processing.
func (h *signalHandler) reap() (exits []exit, err error) {
var (
ws syscall.WaitStatus
rus syscall.Rusage
ws unix.WaitStatus
rus unix.Rusage
)
for {
pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, &rus)
pid, err := unix.Wait4(-1, &ws, unix.WNOHANG, &rus)
if err != nil {
if err == syscall.ECHILD {
if err == unix.ECHILD {
return exits, nil
}
return nil, err

166
spec.go
View file

@ -1,166 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"os"
"runtime"
"github.com/opencontainers/runtime-spec/specs-go"
)
var (
spec = &specs.Spec{
Version: specs.Version,
Platform: specs.Platform{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
},
Root: specs.Root{
Path: "rootfs",
Readonly: true,
},
Process: specs.Process{
Terminal: true,
User: specs.User{},
Args: []string{
"sh",
},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
},
Cwd: "/",
NoNewPrivileges: true,
Capabilities: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Rlimits: []specs.Rlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
Soft: uint64(1024),
},
},
},
Hostname: "ctr",
Mounts: []specs.Mount{
{
Destination: "/proc",
Type: "proc",
Source: "proc",
Options: nil,
},
{
Destination: "/dev",
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
{
Destination: "/dev/pts",
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
},
{
Destination: "/dev/shm",
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
},
{
Destination: "/dev/mqueue",
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/sys",
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev", "ro"},
},
{
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Source: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime"},
},
},
Linux: specs.Linux{
MaskedPaths: []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_stats",
"/proc/sched_debug",
},
ReadonlyPaths: []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
Resources: &specs.Resources{
Devices: []specs.DeviceCgroup{
{
Allow: false,
Access: sPtr("rwm"),
},
},
},
Namespaces: []specs.Namespace{
{
Type: "pid",
},
{
Type: "ipc",
},
{
Type: "network",
},
{
Type: "user",
},
{
Type: "uts",
},
{
Type: "mount",
},
},
Seccomp: defaultSeccompProfile,
},
}
)
// loadSpec loads the specification from the provided path.
// If the path is empty then the default path will be "config.json"
func loadSpec(cPath string) (spec *specs.Spec, err error) {
cf, err := os.Open(cPath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("JSON specification file %s not found", cPath)
}
return nil, err
}
defer cf.Close()
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
return nil, err
}
return spec, nil
}

125
tty.go
View file

@ -4,16 +4,34 @@ import (
"fmt"
"io"
"os"
"os/signal"
"sync"
"github.com/docker/docker/pkg/term"
"github.com/containerd/console"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
)
// setup standard pipes so that the TTY of the calling runc process
// is not inherited by the container.
func createStdioPipes(p *libcontainer.Process, rootuid int) (*tty, error) {
i, err := p.InitializeIO(rootuid)
type tty struct {
epoller *console.Epoller
console *console.EpollConsole
stdin console.Console
closers []io.Closer
postStart []io.Closer
wg sync.WaitGroup
consoleC chan error
}
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
defer t.wg.Done()
io.Copy(w, r)
r.Close()
}
// setup pipes for the process so that advanced features like c/r are able to easily checkpoint
// and restore the process's IO without depending on a host specific path or device
func setupProcessPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) {
i, err := p.InitializeIO(rootuid, rootgid)
if err != nil {
return nil, err
}
@ -44,45 +62,66 @@ func createStdioPipes(p *libcontainer.Process, rootuid int) (*tty, error) {
return t, nil
}
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
defer t.wg.Done()
io.Copy(w, r)
r.Close()
func inheritStdio(process *libcontainer.Process) error {
process.Stdin = os.Stdin
process.Stdout = os.Stdout
process.Stderr = os.Stderr
return nil
}
func createTty(p *libcontainer.Process, rootuid int, consolePath string) (*tty, error) {
if consolePath != "" {
if err := p.ConsoleFromPath(consolePath); err != nil {
return nil, err
}
return &tty{}, nil
}
console, err := p.NewConsole(rootuid)
func (t *tty) recvtty(process *libcontainer.Process, socket *os.File) error {
f, err := utils.RecvFd(socket)
if err != nil {
return nil, err
return err
}
go io.Copy(console, os.Stdin)
go io.Copy(os.Stdout, console)
cons, err := console.ConsoleFromFile(f)
if err != nil {
return err
}
console.ClearONLCR(cons.Fd())
epoller, err := console.NewEpoller()
if err != nil {
return err
}
epollConsole, err := epoller.Add(cons)
if err != nil {
return err
}
go epoller.Wait()
go io.Copy(epollConsole, os.Stdin)
t.wg.Add(1)
go t.copyIO(os.Stdout, epollConsole)
state, err := term.SetRawTerminal(os.Stdin.Fd())
// set raw mode to stdin and also handle interrupt
stdin, err := console.ConsoleFromFile(os.Stdin)
if err != nil {
return nil, fmt.Errorf("failed to set the terminal from the stdin: %v", err)
return err
}
return &tty{
console: console,
state: state,
closers: []io.Closer{
console,
},
}, nil
if err := stdin.SetRaw(); err != nil {
return fmt.Errorf("failed to set the terminal from the stdin: %v", err)
}
go handleInterrupt(stdin)
t.epoller = epoller
t.stdin = stdin
t.console = epollConsole
t.closers = []io.Closer{epollConsole}
return nil
}
type tty struct {
console libcontainer.Console
state *term.State
closers []io.Closer
postStart []io.Closer
wg sync.WaitGroup
func handleInterrupt(c console.Console) {
sigchan := make(chan os.Signal, 1)
signal.Notify(sigchan, os.Interrupt)
<-sigchan
c.Reset()
os.Exit(0)
}
func (t *tty) waitConsole() error {
if t.consoleC != nil {
return <-t.consoleC
}
return nil
}
// ClosePostStart closes any fds that are provided to the container and dup2'd
@ -101,13 +140,17 @@ func (t *tty) Close() error {
for _, c := range t.postStart {
c.Close()
}
// wait for the copy routines to finish before closing the fds
// the process is gone at this point, shutting down the console if we have
// one and wait for all IO to be finished
if t.console != nil && t.epoller != nil {
t.console.Shutdown(t.epoller.CloseConsole)
}
t.wg.Wait()
for _, c := range t.closers {
c.Close()
}
if t.state != nil {
term.RestoreTerminal(os.Stdin.Fd(), t.state)
if t.stdin != nil {
t.stdin.Reset()
}
return nil
}
@ -116,9 +159,5 @@ func (t *tty) resize() error {
if t.console == nil {
return nil
}
ws, err := term.GetWinsize(os.Stdin.Fd())
if err != nil {
return err
}
return term.SetWinsize(t.console.Fd(), ws)
return t.console.ResizeFrom(console.Current())
}

273
utils.go
View file

@ -2,74 +2,91 @@ package main
import (
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"syscall"
"strconv"
"github.com/Sirupsen/logrus"
"github.com/coreos/go-systemd/activation"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runc/libcontainer/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// startContainer starts the container. Returns the exit status or -1 and an
// error. Signals sent to the current process will be forwarded to container.
func startContainer(spec *specs.Spec, id, pidFile string, detach, useSystemdCgroup bool) (int, error) {
// create the libcontainer config
func startContainer(spec *specs.Spec, id, pidFile, consoleSocket, root string, detach bool) (int, error) {
notifySocket := newNotifySocket(id, root)
if notifySocket != nil {
// Setup the spec for the notify socket.
notifySocket.setupSpec(spec)
}
// Create the libcontainer config.
useSystemdCgroup := false
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
CgroupName: id,
UseSystemdCgroup: useSystemdCgroup,
NoPivotRoot: false,
NoNewKeyring: false,
Spec: spec,
Rootless: true,
})
if err != nil {
return -1, err
}
if _, err := os.Stat(config.Rootfs); err != nil {
if os.IsNotExist(err) {
return -1, fmt.Errorf("rootfs (%q) does not exist", config.Rootfs)
}
return -1, err
}
factory, err := loadFactory(useSystemdCgroup)
// Load the factory.
factory, err := loadFactory(root, useSystemdCgroup)
if err != nil {
return -1, err
}
// Create the factory.
container, err := factory.Create(id, config)
if err != nil {
return -1, err
}
// Support on-demand socket activation by passing file descriptors into the container init process.
if notifySocket != nil {
// Setup the socket for the notify socket.
err := notifySocket.setupSocket()
if err != nil {
return -1, err
}
}
// Support on-demand socket activation by passing file descriptors into
// the container init process.
listenFDs := []*os.File{}
if os.Getenv("LISTEN_FDS") != "" {
listenFDs = activation.Files(false)
}
// Initialize the runner.
r := &runner{
enableSubreaper: true,
shouldDestroy: true,
container: container,
console: console,
listenFDs: listenFDs,
notifySocket: notifySocket,
consoleSocket: consoleSocket,
detach: detach,
pidFile: pidFile,
listenFDs: listenFDs,
}
return r.run(&spec.Process)
// Run the process.
return r.run(spec.Process)
}
// loadFactory returns the configured factory instance for execing containers.
func loadFactory(useSystemdCgroup bool) (libcontainer.Factory, error) {
abs, err := filepath.Abs(root)
if err != nil {
return nil, err
}
func loadFactory(root string, useSystemdCgroup bool) (libcontainer.Factory, error) {
// Setup the cgroups manager. Default is cgroupfs.
cgroupManager := libcontainer.Cgroupfs
if useSystemdCgroup {
if systemd.UseSystemd() {
@ -78,25 +95,61 @@ func loadFactory(useSystemdCgroup bool) (libcontainer.Factory, error) {
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
}
}
return libcontainer.New(abs, cgroupManager, func(l *libcontainer.LinuxFactory) error {
return nil
})
// We resolve the paths for {newuidmap,newgidmap} from the context of runc,
// to avoid doing a path lookup in the nsexec context. TODO: The binary
// names are not currently configurable.
newuidmap, err := exec.LookPath("newuidmap")
if err != nil {
newuidmap = ""
}
newgidmap, err := exec.LookPath("newgidmap")
if err != nil {
newgidmap = ""
}
// Create the new libcontainer factory.
return libcontainer.New(root, cgroupManager, nil, nil,
libcontainer.NewuidmapPath(newuidmap),
libcontainer.NewgidmapPath(newgidmap))
}
// newProcess returns a new libcontainer Process with the arguments from the
// spec and stdio from the current process.
func newProcess(p specs.Process) (*libcontainer.Process, error) {
// Create the libcontainer process.
lp := &libcontainer.Process{
Args: p.Args,
Env: p.Env,
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
Args: p.Args,
Env: p.Env,
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
Cwd: p.Cwd,
Capabilities: p.Capabilities,
Label: p.SelinuxLabel,
NoNewPrivileges: &p.NoNewPrivileges,
AppArmorProfile: p.ApparmorProfile,
}
// Setup the console size.
if p.ConsoleSize != nil {
lp.ConsoleWidth = uint16(p.ConsoleSize.Width)
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}
// Convert the capabilities.
if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
lp.Capabilities.Effective = p.Capabilities.Effective
lp.Capabilities.Inheritable = p.Capabilities.Inheritable
lp.Capabilities.Permitted = p.Capabilities.Permitted
lp.Capabilities.Ambient = p.Capabilities.Ambient
}
// Setup the additional user groups.
for _, gid := range p.User.AdditionalGids {
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
}
// Setup the Rlimits.
for _, rlimit := range p.Rlimits {
rl, err := createLibContainerRlimit(rlimit)
if err != nil {
@ -104,23 +157,8 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
}
lp.Rlimits = append(lp.Rlimits, rl)
}
return lp, nil
}
func dupStdio(process *libcontainer.Process, rootuid int) error {
process.Stdin = os.Stdin
process.Stdout = os.Stdout
process.Stderr = os.Stderr
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stdout.Fd(),
os.Stderr.Fd(),
} {
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
return err
}
}
return nil
return lp, nil
}
func destroy(container libcontainer.Container) {
@ -129,24 +167,55 @@ func destroy(container libcontainer.Container) {
}
}
// setupIO sets the proper IO on the process depending on the configuration
// If there is a nil error then there must be a non nil tty returned
func setupIO(process *libcontainer.Process, rootuid int, console string, createTTY, detach bool) (*tty, error) {
// detach and createTty will not work unless a console path is passed
// so error out here before changing any terminal settings
if createTTY && detach && console == "" {
return nil, fmt.Errorf("cannot allocate tty if runc will detach")
}
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
if createTTY {
return createTty(process, rootuid, console)
process.Stdin = nil
process.Stdout = nil
process.Stderr = nil
t := &tty{}
if !detach {
parent, child, err := utils.NewSockPair("console")
if err != nil {
return nil, err
}
process.ConsoleSocket = child
t.postStart = append(t.postStart, parent, child)
t.consoleC = make(chan error, 1)
go func() {
if err := t.recvtty(process, parent); err != nil {
t.consoleC <- err
}
t.consoleC <- nil
}()
} else {
// the caller of runc will handle receiving the console master
conn, err := net.Dial("unix", sockpath)
if err != nil {
return nil, err
}
uc, ok := conn.(*net.UnixConn)
if !ok {
return nil, fmt.Errorf("casting to UnixConn failed")
}
t.postStart = append(t.postStart, uc)
socket, err := uc.File()
if err != nil {
return nil, err
}
t.postStart = append(t.postStart, socket)
process.ConsoleSocket = socket
}
return t, nil
}
// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
// and the container's process inherits runc's stdio.
if detach {
if err := dupStdio(process, rootuid); err != nil {
if err := inheritStdio(process); err != nil {
return nil, err
}
return &tty{}, nil
}
return createStdioPipes(process, rootuid)
return setupProcessPipes(process, rootuid, rootgid)
}
// createPidFile creates a file with the processes pid inside it atomically
@ -175,46 +244,86 @@ func createPidFile(path string, process *libcontainer.Process) error {
type runner struct {
enableSubreaper bool
shouldDestroy bool
detach bool
listenFDs []*os.File
shouldDestroy bool
consoleSocket string
pidFile string
console string
container libcontainer.Container
listenFDs []*os.File
notifySocket *notifySocket
}
func (r *runner) run(config *specs.Process) (int, error) {
// Check the terminal settings.
if r.detach && config.Terminal && r.consoleSocket == "" {
return -1, fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
}
if (!r.detach || !config.Terminal) && r.consoleSocket != "" {
return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
}
// Create the process.
process, err := newProcess(*config)
if err != nil {
r.destroy()
return -1, err
}
// Setup the listen file descriptors.
if len(r.listenFDs) > 0 {
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
}
rootuid, err := r.container.Config().HostUID()
// Get the rootuid.
rootuid, err := r.container.Config().HostRootUID()
if err != nil {
r.destroy()
return -1, err
}
tty, err := setupIO(process, rootuid, r.console, config.Terminal, r.detach)
// Get the rootgid.
rootgid, err := r.container.Config().HostRootGID()
if err != nil {
r.destroy()
return -1, err
}
handler := newSignalHandler(tty, r.enableSubreaper)
if err := r.container.Start(process); err != nil {
// Setting up IO is a two stage process. We need to modify process to deal
// with detaching containers, and then we get a tty after the container has
// started.
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, r.detach, r.consoleSocket)
if err != nil {
r.destroy()
return -1, err
}
defer tty.Close()
// Run the container.
if err := r.container.Run(process); err != nil {
r.destroy()
tty.Close()
return -1, err
}
if err := tty.ClosePostStart(); err != nil {
// Wait for the tty.
if err := tty.waitConsole(); err != nil {
r.terminate(process)
r.destroy()
tty.Close()
return -1, err
}
// Close after start the tty.
if err = tty.ClosePostStart(); err != nil {
r.terminate(process)
r.destroy()
tty.Close()
return -1, err
}
// Create the pid file.
if r.pidFile != "" {
if err := createPidFile(r.pidFile, process); err != nil {
r.terminate(process)
@ -223,16 +332,21 @@ func (r *runner) run(config *specs.Process) (int, error) {
return -1, err
}
}
if r.detach {
tty.Close()
return 0, nil
}
status, err := handler.forward(process)
// Forward the handler.
status, err := handler.forward(process, tty, detach)
if err != nil {
r.terminate(process)
}
// Return early if we are detaching.
if r.detach {
return 0, nil
}
// Cleanup.
r.destroy()
tty.Close()
return status, err
}
@ -243,27 +357,18 @@ func (r *runner) destroy() {
}
func (r *runner) terminate(p *libcontainer.Process) {
p.Signal(syscall.SIGKILL)
p.Wait()
_ = p.Signal(unix.SIGKILL)
_, _ = p.Wait()
}
func sPtr(s string) *string { return &s }
func createLibContainerRlimit(rlimit specs.Rlimit) (configs.Rlimit, error) {
func createLibContainerRlimit(rlimit specs.POSIXRlimit) (configs.Rlimit, error) {
rl, err := strToRlimit(rlimit.Type)
if err != nil {
return configs.Rlimit{}, err
}
return configs.Rlimit{
Type: rl,
Hard: uint64(rlimit.Hard),
Soft: uint64(rlimit.Soft),
Hard: rlimit.Hard,
Soft: rlimit.Soft,
}, nil
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func setupSdNotify(spec *specs.Spec, notifySocket string) {
spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}})
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
}