update main to use vendor

Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
This commit is contained in:
Jess Frazelle 2018-03-19 21:44:18 -04:00
parent 639756e8c6
commit e346c2e0ba
9 changed files with 1102 additions and 1003 deletions

93
main.go
View file

@ -5,16 +5,17 @@ import (
"fmt" "fmt"
"os" "os"
"os/exec" "os/exec"
"path/filepath"
"runtime" "runtime"
"strings" "strings"
"github.com/Sirupsen/logrus"
aaprofile "github.com/docker/docker/profiles/apparmor" aaprofile "github.com/docker/docker/profiles/apparmor"
"github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/apparmor"
_ "github.com/opencontainers/runc/libcontainer/nsenter" _ "github.com/opencontainers/runc/libcontainer/nsenter"
"github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go" specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
) )
const ( const (
@ -40,15 +41,14 @@ const (
) )
var ( var (
console = os.Getenv("console")
containerID string containerID string
pidFile string pidFile string
root string root string
allocateTty bool allocateTty bool
consoleSocket string
detach bool detach bool
readonly bool readonly bool
useSystemdCgroup bool
hooks specs.Hooks hooks specs.Hooks
hookflags stringSlice hookflags stringSlice
@ -113,16 +113,14 @@ func (s stringSlice) ParseHooks() (hooks specs.Hooks, err error) {
func init() { func init() {
// Parse flags // Parse flags
flag.StringVar(&containerID, "id", IMAGE, "container ID") flag.StringVar(&containerID, "id", IMAGE, "container ID")
flag.StringVar(&console, "console", console, "the pty slave path for use with the container")
flag.StringVar(&pidFile, "pid-file", "", "specify the file to write the process id to") flag.StringVar(&pidFile, "pid-file", "", "specify the file to write the process id to")
flag.StringVar(&root, "root", defaultRoot, "root directory of container state, should be tmpfs") flag.StringVar(&root, "root", defaultRoot, "root directory of container state, should be tmpfs")
flag.Var(&hookflags, "hook", "Hooks to prefill into spec file. (ex. --hook prestart:netns)") flag.Var(&hookflags, "hook", "Hooks to prefill into spec file. (ex. --hook prestart:netns)")
flag.BoolVar(&allocateTty, "t", true, "allocate a tty for the container") flag.BoolVar(&allocateTty, "t", true, "allocate a tty for the container")
flag.StringVar(&consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
flag.BoolVar(&detach, "d", false, "detach from the container's process") flag.BoolVar(&detach, "d", false, "detach from the container's process")
// TODO (jess): do not enable this flag, the error is very gross on systemd
// flag.BoolVar(&useSystemdCgroup, "systemd-cgroup", false, "enable systemd cgroup support")
flag.BoolVar(&readonly, "read-only", false, "make container filesystem readonly") flag.BoolVar(&readonly, "read-only", false, "make container filesystem readonly")
flag.BoolVar(&version, "version", false, "print version and exit") flag.BoolVar(&version, "version", false, "print version and exit")
@ -141,17 +139,33 @@ func init() {
os.Exit(0) os.Exit(0)
} }
// Set log level // Set log level.
if debug { if debug {
logrus.SetLevel(logrus.DebugLevel) logrus.SetLevel(logrus.DebugLevel)
} }
// parse the hook flags // Parse the hook flags.
var err error var err error
hooks, err = hookflags.ParseHooks() hooks, err = hookflags.ParseHooks()
if err != nil { if err != nil {
logrus.Fatal(err) logrus.Fatal(err)
} }
// Convert pid-file to an absolute path so we can write to the
// right file after chdir to bundle.
if pidFile != "" {
pidFile, err = filepath.Abs(pidFile)
if err != nil {
logrus.Fatal(err)
}
}
// Get the absolute path to the root.
root, err = filepath.Abs(root)
if err != nil {
logrus.Fatal(err)
}
} }
//go:generate go run generate.go //go:generate go run generate.go
@ -161,28 +175,27 @@ func main() {
return return
} }
notifySocket := os.Getenv("NOTIFY_SOCKET") // Initialize the spec.
if notifySocket != "" { spec := specconv.Example()
setupSdNotify(spec, notifySocket)
}
// override the cmd in the spec with any args specified // Set the spec to be rootless.
if len(flag.Args()) > 0 { specconv.ToRootless(spec)
spec.Process.Args = flag.Args()
}
// setup readonly fs in spec // Setup readonly fs in spec.
spec.Root.Readonly = readonly spec.Root.Readonly = readonly
// setup tty in spec // Setup tty in spec.
spec.Process.Terminal = allocateTty spec.Process.Terminal = allocateTty
// pass in any hooks // Pass in any hooks to the spec.
spec.Hooks = hooks spec.Hooks = &hooks
// install the default apparmor profile // Set the default seccomp profile.
spec.Linux.Seccomp = defaultSeccompProfile
// Install the default apparmor profile.
if apparmor.IsEnabled() { if apparmor.IsEnabled() {
// check if we have the docker-default apparmor profile loaded // Check if we have the docker-default apparmor profile loaded.
if _, err := aaprofile.IsLoaded(defaultApparmorProfile); err != nil { if _, err := aaprofile.IsLoaded(defaultApparmorProfile); err != nil {
logrus.Warnf("AppArmor enabled on system but the %s profile is not loaded. apparmor_parser needs root to load a profile so we can't do it for you.", defaultApparmorProfile) logrus.Warnf("AppArmor enabled on system but the %s profile is not loaded. apparmor_parser needs root to load a profile so we can't do it for you.", defaultApparmorProfile)
} else { } else {
@ -190,45 +203,23 @@ func main() {
} }
} }
// set the CgroupsPath as this user // Unpack the rootfs.
u, err := user.CurrentUser()
if err != nil {
logrus.Fatal(err)
}
spec.Linux.CgroupsPath = sPtr(u.Name)
// setup UID mappings
spec.Linux.UIDMappings = []specs.IDMapping{
{
HostID: uint32(u.Uid),
ContainerID: 0,
Size: 1,
},
}
// setup GID mappings
spec.Linux.GIDMappings = []specs.IDMapping{
{
HostID: uint32(u.Gid),
ContainerID: 0,
Size: 1,
},
}
if err := unpackRootfs(spec); err != nil { if err := unpackRootfs(spec); err != nil {
logrus.Fatal(err) logrus.Fatal(err)
} }
status, err := startContainer(spec, containerID, pidFile, detach, useSystemdCgroup) // Start the container.
status, err := startContainer(spec, containerID, pidFile, consoleSocket, root, detach)
if err != nil { if err != nil {
logrus.Fatal(err) logrus.Fatal(err)
} }
// Remove the rootfs after the container has exited.
if err := os.RemoveAll(defaultRootfsDir); err != nil { if err := os.RemoveAll(defaultRootfsDir); err != nil {
logrus.Warnf("removing rootfs failed: %v", err) logrus.Warnf("removing rootfs failed: %v", err)
} }
// exit with the container's exit status // Exit with the container's exit status.
os.Exit(status) os.Exit(status)
} }

106
notify_socket.go Normal file
View file

@ -0,0 +1,106 @@
package main
import (
"bytes"
"fmt"
"net"
"os"
"path/filepath"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type notifySocket struct {
socket *net.UnixConn
host string
socketPath string
}
func newNotifySocket(id, root string) *notifySocket {
if os.Getenv("NOTIFY_SOCKET") == "" {
// Return early if we do not have a NOTIFY_SOCKET.
return nil
}
path := filepath.Join(filepath.Join(root, id), "notify.sock")
notifySocket := &notifySocket{
socket: nil,
host: os.Getenv("NOTIFY_SOCKET"),
socketPath: path,
}
return notifySocket
}
func (s *notifySocket) Close() error {
return s.socket.Close()
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func (s *notifySocket) setupSpec(spec *specs.Spec) {
mount := specs.Mount{Destination: s.host, Type: "bind", Source: s.socketPath, Options: []string{"bind"}}
spec.Mounts = append(spec.Mounts, mount)
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
}
func (s *notifySocket) setupSocket() error {
addr := net.UnixAddr{
Name: s.socketPath,
Net: "unixgram",
}
socket, err := net.ListenUnixgram("unixgram", &addr)
if err != nil {
return err
}
s.socket = socket
return nil
}
// pid1 must be set only with -d, as it is used to set the new process as the main process
// for the service in butts
func (notifySocket *notifySocket) run(pid1 int) {
buf := make([]byte, 512)
notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
if err != nil {
logrus.Error(err)
return
}
for {
r, err := notifySocket.socket.Read(buf)
if err != nil {
break
}
var out bytes.Buffer
for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
if bytes.HasPrefix(line, []byte("READY=")) {
_, err = out.Write(line)
if err != nil {
return
}
_, err = out.Write([]byte{'\n'})
if err != nil {
return
}
_, err = client.Write(out.Bytes())
if err != nil {
return
}
// now we can inform butts to use pid1 as the pid to monitor
if pid1 > 0 {
newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
client.Write([]byte(newPid))
}
return
}
}
}
}

View file

@ -43,7 +43,7 @@ var rlimitMap = map[string]int{
func strToRlimit(key string) (int, error) { func strToRlimit(key string) (int, error) {
rl, ok := rlimitMap[key] rl, ok := rlimitMap[key]
if !ok { if !ok {
return 0, fmt.Errorf("Wrong rlimit value: %s", key) return 0, fmt.Errorf("wrong rlimit value: %s", key)
} }
return rl, nil return rl, nil
} }

View file

@ -22,7 +22,7 @@ func unpackRootfs(spec *specs.Spec) error {
} }
r := bytes.NewReader(data) r := bytes.NewReader(data)
if err := archive.Untar(r, defaultRootfsDir, nil); err != nil { if err := archive.Untar(r, defaultRootfsDir, &archive.TarOptions{NoLchown: true}); err != nil {
return err return err
} }

1272
seccomp.go

File diff suppressed because it is too large Load diff

View file

@ -3,19 +3,23 @@ package main
import ( import (
"os" "os"
"os/signal" "os/signal"
"syscall" "syscall" // only for Signal
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
) )
const signalBufferSize = 2048 const signalBufferSize = 2048
// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals // newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals
// while still forwarding all other signals to the process. // while still forwarding all other signals to the process.
func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler { // If notifySocket is present, use it to read butts notifications from the container and
// forward them to notifySocketHost.
func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) *signalHandler {
if enableSubreaper { if enableSubreaper {
// set us as the subreaper before registering the signal handler for the container // set us as the subreaper before registering the signal handler for the container
if err := system.SetSubreaper(1); err != nil { if err := system.SetSubreaper(1); err != nil {
@ -28,8 +32,8 @@ func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler {
// handle all signals for the process. // handle all signals for the process.
signal.Notify(s) signal.Notify(s)
return &signalHandler{ return &signalHandler{
tty: tty,
signals: s, signals: s,
notifySocket: notifySocket,
} }
} }
@ -42,25 +46,42 @@ type exit struct {
type signalHandler struct { type signalHandler struct {
signals chan os.Signal signals chan os.Signal
tty *tty notifySocket *notifySocket
} }
// forward handles the main signal event loop forwarding, resizing, or reaping depending // forward handles the main signal event loop forwarding, resizing, or reaping depending
// on the signal received. // on the signal received.
func (h *signalHandler) forward(process *libcontainer.Process) (int, error) { func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach bool) (int, error) {
// make sure we know the pid of our main process so that we can return // make sure we know the pid of our main process so that we can return
// after it dies. // after it dies.
if detach && h.notifySocket == nil {
return 0, nil
}
pid1, err := process.Pid() pid1, err := process.Pid()
if err != nil { if err != nil {
return -1, err return -1, err
} }
// perform the initial tty resize.
h.tty.resize() if h.notifySocket != nil {
if detach {
h.notifySocket.run(pid1)
return 0, nil
} else {
go h.notifySocket.run(0)
}
}
// Perform the initial tty resize. Always ignore errors resizing because
// stdout might have disappeared (due to races with when SIGHUP is sent).
_ = tty.resize()
// Handle and forward signals.
for s := range h.signals { for s := range h.signals {
switch s { switch s {
case syscall.SIGWINCH: case unix.SIGWINCH:
h.tty.resize() // Ignore errors resizing, as above.
case syscall.SIGCHLD: _ = tty.resize()
case unix.SIGCHLD:
exits, err := h.reap() exits, err := h.reap()
if err != nil { if err != nil {
logrus.Error(err) logrus.Error(err)
@ -75,12 +96,15 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
// status because we must ensure that any of the go specific process // status because we must ensure that any of the go specific process
// fun such as flushing pipes are complete before we return. // fun such as flushing pipes are complete before we return.
process.Wait() process.Wait()
if h.notifySocket != nil {
h.notifySocket.Close()
}
return e.status, nil return e.status, nil
} }
} }
default: default:
logrus.Debugf("sending signal to process %s", s) logrus.Debugf("sending signal to process %s", s)
if err := syscall.Kill(pid1, s.(syscall.Signal)); err != nil { if err := unix.Kill(pid1, s.(syscall.Signal)); err != nil {
logrus.Error(err) logrus.Error(err)
} }
} }
@ -92,13 +116,13 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
// then returns all exits to the main event loop for further processing. // then returns all exits to the main event loop for further processing.
func (h *signalHandler) reap() (exits []exit, err error) { func (h *signalHandler) reap() (exits []exit, err error) {
var ( var (
ws syscall.WaitStatus ws unix.WaitStatus
rus syscall.Rusage rus unix.Rusage
) )
for { for {
pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, &rus) pid, err := unix.Wait4(-1, &ws, unix.WNOHANG, &rus)
if err != nil { if err != nil {
if err == syscall.ECHILD { if err == unix.ECHILD {
return exits, nil return exits, nil
} }
return nil, err return nil, err

166
spec.go
View file

@ -1,166 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"os"
"runtime"
"github.com/opencontainers/runtime-spec/specs-go"
)
var (
spec = &specs.Spec{
Version: specs.Version,
Platform: specs.Platform{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
},
Root: specs.Root{
Path: "rootfs",
Readonly: true,
},
Process: specs.Process{
Terminal: true,
User: specs.User{},
Args: []string{
"sh",
},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
},
Cwd: "/",
NoNewPrivileges: true,
Capabilities: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Rlimits: []specs.Rlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
Soft: uint64(1024),
},
},
},
Hostname: "ctr",
Mounts: []specs.Mount{
{
Destination: "/proc",
Type: "proc",
Source: "proc",
Options: nil,
},
{
Destination: "/dev",
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
{
Destination: "/dev/pts",
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
},
{
Destination: "/dev/shm",
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
},
{
Destination: "/dev/mqueue",
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/sys",
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev", "ro"},
},
{
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Source: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime"},
},
},
Linux: specs.Linux{
MaskedPaths: []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_stats",
"/proc/sched_debug",
},
ReadonlyPaths: []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
Resources: &specs.Resources{
Devices: []specs.DeviceCgroup{
{
Allow: false,
Access: sPtr("rwm"),
},
},
},
Namespaces: []specs.Namespace{
{
Type: "pid",
},
{
Type: "ipc",
},
{
Type: "network",
},
{
Type: "user",
},
{
Type: "uts",
},
{
Type: "mount",
},
},
Seccomp: defaultSeccompProfile,
},
}
)
// loadSpec loads the specification from the provided path.
// If the path is empty then the default path will be "config.json"
func loadSpec(cPath string) (spec *specs.Spec, err error) {
cf, err := os.Open(cPath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("JSON specification file %s not found", cPath)
}
return nil, err
}
defer cf.Close()
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
return nil, err
}
return spec, nil
}

125
tty.go
View file

@ -4,16 +4,34 @@ import (
"fmt" "fmt"
"io" "io"
"os" "os"
"os/signal"
"sync" "sync"
"github.com/docker/docker/pkg/term" "github.com/containerd/console"
"github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
) )
// setup standard pipes so that the TTY of the calling runc process type tty struct {
// is not inherited by the container. epoller *console.Epoller
func createStdioPipes(p *libcontainer.Process, rootuid int) (*tty, error) { console *console.EpollConsole
i, err := p.InitializeIO(rootuid) stdin console.Console
closers []io.Closer
postStart []io.Closer
wg sync.WaitGroup
consoleC chan error
}
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
defer t.wg.Done()
io.Copy(w, r)
r.Close()
}
// setup pipes for the process so that advanced features like c/r are able to easily checkpoint
// and restore the process's IO without depending on a host specific path or device
func setupProcessPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) {
i, err := p.InitializeIO(rootuid, rootgid)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -44,45 +62,66 @@ func createStdioPipes(p *libcontainer.Process, rootuid int) (*tty, error) {
return t, nil return t, nil
} }
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) { func inheritStdio(process *libcontainer.Process) error {
defer t.wg.Done() process.Stdin = os.Stdin
io.Copy(w, r) process.Stdout = os.Stdout
r.Close() process.Stderr = os.Stderr
return nil
} }
func createTty(p *libcontainer.Process, rootuid int, consolePath string) (*tty, error) { func (t *tty) recvtty(process *libcontainer.Process, socket *os.File) error {
if consolePath != "" { f, err := utils.RecvFd(socket)
if err := p.ConsoleFromPath(consolePath); err != nil {
return nil, err
}
return &tty{}, nil
}
console, err := p.NewConsole(rootuid)
if err != nil { if err != nil {
return nil, err return err
} }
go io.Copy(console, os.Stdin) cons, err := console.ConsoleFromFile(f)
go io.Copy(os.Stdout, console) if err != nil {
return err
}
console.ClearONLCR(cons.Fd())
epoller, err := console.NewEpoller()
if err != nil {
return err
}
epollConsole, err := epoller.Add(cons)
if err != nil {
return err
}
go epoller.Wait()
go io.Copy(epollConsole, os.Stdin)
t.wg.Add(1)
go t.copyIO(os.Stdout, epollConsole)
state, err := term.SetRawTerminal(os.Stdin.Fd()) // set raw mode to stdin and also handle interrupt
stdin, err := console.ConsoleFromFile(os.Stdin)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to set the terminal from the stdin: %v", err) return err
} }
return &tty{ if err := stdin.SetRaw(); err != nil {
console: console, return fmt.Errorf("failed to set the terminal from the stdin: %v", err)
state: state, }
closers: []io.Closer{ go handleInterrupt(stdin)
console,
}, t.epoller = epoller
}, nil t.stdin = stdin
t.console = epollConsole
t.closers = []io.Closer{epollConsole}
return nil
} }
type tty struct { func handleInterrupt(c console.Console) {
console libcontainer.Console sigchan := make(chan os.Signal, 1)
state *term.State signal.Notify(sigchan, os.Interrupt)
closers []io.Closer <-sigchan
postStart []io.Closer c.Reset()
wg sync.WaitGroup os.Exit(0)
}
func (t *tty) waitConsole() error {
if t.consoleC != nil {
return <-t.consoleC
}
return nil
} }
// ClosePostStart closes any fds that are provided to the container and dup2'd // ClosePostStart closes any fds that are provided to the container and dup2'd
@ -101,13 +140,17 @@ func (t *tty) Close() error {
for _, c := range t.postStart { for _, c := range t.postStart {
c.Close() c.Close()
} }
// wait for the copy routines to finish before closing the fds // the process is gone at this point, shutting down the console if we have
// one and wait for all IO to be finished
if t.console != nil && t.epoller != nil {
t.console.Shutdown(t.epoller.CloseConsole)
}
t.wg.Wait() t.wg.Wait()
for _, c := range t.closers { for _, c := range t.closers {
c.Close() c.Close()
} }
if t.state != nil { if t.stdin != nil {
term.RestoreTerminal(os.Stdin.Fd(), t.state) t.stdin.Reset()
} }
return nil return nil
} }
@ -116,9 +159,5 @@ func (t *tty) resize() error {
if t.console == nil { if t.console == nil {
return nil return nil
} }
ws, err := term.GetWinsize(os.Stdin.Fd()) return t.console.ResizeFrom(console.Current())
if err != nil {
return err
}
return term.SetWinsize(t.console.Fd(), ws)
} }

269
utils.go
View file

@ -2,74 +2,91 @@ package main
import ( import (
"fmt" "fmt"
"net"
"os" "os"
"os/exec"
"path/filepath" "path/filepath"
"syscall" "strconv"
"github.com/Sirupsen/logrus"
"github.com/coreos/go-systemd/activation" "github.com/coreos/go-systemd/activation"
"github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv" "github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runc/libcontainer/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
) )
// startContainer starts the container. Returns the exit status or -1 and an // startContainer starts the container. Returns the exit status or -1 and an
// error. Signals sent to the current process will be forwarded to container. // error. Signals sent to the current process will be forwarded to container.
func startContainer(spec *specs.Spec, id, pidFile string, detach, useSystemdCgroup bool) (int, error) { func startContainer(spec *specs.Spec, id, pidFile, consoleSocket, root string, detach bool) (int, error) {
// create the libcontainer config notifySocket := newNotifySocket(id, root)
if notifySocket != nil {
// Setup the spec for the notify socket.
notifySocket.setupSpec(spec)
}
// Create the libcontainer config.
useSystemdCgroup := false
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
CgroupName: id, CgroupName: id,
UseSystemdCgroup: useSystemdCgroup, UseSystemdCgroup: useSystemdCgroup,
NoPivotRoot: false, NoPivotRoot: false,
NoNewKeyring: false,
Spec: spec, Spec: spec,
Rootless: true,
}) })
if err != nil { if err != nil {
return -1, err return -1, err
} }
if _, err := os.Stat(config.Rootfs); err != nil { // Load the factory.
if os.IsNotExist(err) { factory, err := loadFactory(root, useSystemdCgroup)
return -1, fmt.Errorf("rootfs (%q) does not exist", config.Rootfs)
}
return -1, err
}
factory, err := loadFactory(useSystemdCgroup)
if err != nil { if err != nil {
return -1, err return -1, err
} }
// Create the factory.
container, err := factory.Create(id, config) container, err := factory.Create(id, config)
if err != nil { if err != nil {
return -1, err return -1, err
} }
// Support on-demand socket activation by passing file descriptors into the container init process. if notifySocket != nil {
// Setup the socket for the notify socket.
err := notifySocket.setupSocket()
if err != nil {
return -1, err
}
}
// Support on-demand socket activation by passing file descriptors into
// the container init process.
listenFDs := []*os.File{} listenFDs := []*os.File{}
if os.Getenv("LISTEN_FDS") != "" { if os.Getenv("LISTEN_FDS") != "" {
listenFDs = activation.Files(false) listenFDs = activation.Files(false)
} }
// Initialize the runner.
r := &runner{ r := &runner{
enableSubreaper: true, enableSubreaper: true,
shouldDestroy: true, shouldDestroy: true,
container: container, container: container,
console: console, listenFDs: listenFDs,
notifySocket: notifySocket,
consoleSocket: consoleSocket,
detach: detach, detach: detach,
pidFile: pidFile, pidFile: pidFile,
listenFDs: listenFDs,
} }
return r.run(&spec.Process) // Run the process.
return r.run(spec.Process)
} }
// loadFactory returns the configured factory instance for execing containers. // loadFactory returns the configured factory instance for execing containers.
func loadFactory(useSystemdCgroup bool) (libcontainer.Factory, error) { func loadFactory(root string, useSystemdCgroup bool) (libcontainer.Factory, error) {
abs, err := filepath.Abs(root) // Setup the cgroups manager. Default is cgroupfs.
if err != nil {
return nil, err
}
cgroupManager := libcontainer.Cgroupfs cgroupManager := libcontainer.Cgroupfs
if useSystemdCgroup { if useSystemdCgroup {
if systemd.UseSystemd() { if systemd.UseSystemd() {
@ -78,25 +95,61 @@ func loadFactory(useSystemdCgroup bool) (libcontainer.Factory, error) {
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available") return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
} }
} }
return libcontainer.New(abs, cgroupManager, func(l *libcontainer.LinuxFactory) error {
return nil // We resolve the paths for {newuidmap,newgidmap} from the context of runc,
}) // to avoid doing a path lookup in the nsexec context. TODO: The binary
// names are not currently configurable.
newuidmap, err := exec.LookPath("newuidmap")
if err != nil {
newuidmap = ""
}
newgidmap, err := exec.LookPath("newgidmap")
if err != nil {
newgidmap = ""
}
// Create the new libcontainer factory.
return libcontainer.New(root, cgroupManager, nil, nil,
libcontainer.NewuidmapPath(newuidmap),
libcontainer.NewgidmapPath(newgidmap))
} }
// newProcess returns a new libcontainer Process with the arguments from the // newProcess returns a new libcontainer Process with the arguments from the
// spec and stdio from the current process. // spec and stdio from the current process.
func newProcess(p specs.Process) (*libcontainer.Process, error) { func newProcess(p specs.Process) (*libcontainer.Process, error) {
// Create the libcontainer process.
lp := &libcontainer.Process{ lp := &libcontainer.Process{
Args: p.Args, Args: p.Args,
Env: p.Env, Env: p.Env,
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID), User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
Cwd: p.Cwd, Cwd: p.Cwd,
Capabilities: p.Capabilities,
Label: p.SelinuxLabel, Label: p.SelinuxLabel,
NoNewPrivileges: &p.NoNewPrivileges, NoNewPrivileges: &p.NoNewPrivileges,
AppArmorProfile: p.ApparmorProfile, AppArmorProfile: p.ApparmorProfile,
} }
// Setup the console size.
if p.ConsoleSize != nil {
lp.ConsoleWidth = uint16(p.ConsoleSize.Width)
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}
// Convert the capabilities.
if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
lp.Capabilities.Effective = p.Capabilities.Effective
lp.Capabilities.Inheritable = p.Capabilities.Inheritable
lp.Capabilities.Permitted = p.Capabilities.Permitted
lp.Capabilities.Ambient = p.Capabilities.Ambient
}
// Setup the additional user groups.
for _, gid := range p.User.AdditionalGids {
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
}
// Setup the Rlimits.
for _, rlimit := range p.Rlimits { for _, rlimit := range p.Rlimits {
rl, err := createLibContainerRlimit(rlimit) rl, err := createLibContainerRlimit(rlimit)
if err != nil { if err != nil {
@ -104,23 +157,8 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
} }
lp.Rlimits = append(lp.Rlimits, rl) lp.Rlimits = append(lp.Rlimits, rl)
} }
return lp, nil
}
func dupStdio(process *libcontainer.Process, rootuid int) error { return lp, nil
process.Stdin = os.Stdin
process.Stdout = os.Stdout
process.Stderr = os.Stderr
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stdout.Fd(),
os.Stderr.Fd(),
} {
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
return err
}
}
return nil
} }
func destroy(container libcontainer.Container) { func destroy(container libcontainer.Container) {
@ -129,24 +167,55 @@ func destroy(container libcontainer.Container) {
} }
} }
// setupIO sets the proper IO on the process depending on the configuration func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
// If there is a nil error then there must be a non nil tty returned
func setupIO(process *libcontainer.Process, rootuid int, console string, createTTY, detach bool) (*tty, error) {
// detach and createTty will not work unless a console path is passed
// so error out here before changing any terminal settings
if createTTY && detach && console == "" {
return nil, fmt.Errorf("cannot allocate tty if runc will detach")
}
if createTTY { if createTTY {
return createTty(process, rootuid, console) process.Stdin = nil
process.Stdout = nil
process.Stderr = nil
t := &tty{}
if !detach {
parent, child, err := utils.NewSockPair("console")
if err != nil {
return nil, err
} }
process.ConsoleSocket = child
t.postStart = append(t.postStart, parent, child)
t.consoleC = make(chan error, 1)
go func() {
if err := t.recvtty(process, parent); err != nil {
t.consoleC <- err
}
t.consoleC <- nil
}()
} else {
// the caller of runc will handle receiving the console master
conn, err := net.Dial("unix", sockpath)
if err != nil {
return nil, err
}
uc, ok := conn.(*net.UnixConn)
if !ok {
return nil, fmt.Errorf("casting to UnixConn failed")
}
t.postStart = append(t.postStart, uc)
socket, err := uc.File()
if err != nil {
return nil, err
}
t.postStart = append(t.postStart, socket)
process.ConsoleSocket = socket
}
return t, nil
}
// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
// and the container's process inherits runc's stdio.
if detach { if detach {
if err := dupStdio(process, rootuid); err != nil { if err := inheritStdio(process); err != nil {
return nil, err return nil, err
} }
return &tty{}, nil return &tty{}, nil
} }
return createStdioPipes(process, rootuid) return setupProcessPipes(process, rootuid, rootgid)
} }
// createPidFile creates a file with the processes pid inside it atomically // createPidFile creates a file with the processes pid inside it atomically
@ -175,46 +244,86 @@ func createPidFile(path string, process *libcontainer.Process) error {
type runner struct { type runner struct {
enableSubreaper bool enableSubreaper bool
shouldDestroy bool
detach bool detach bool
listenFDs []*os.File shouldDestroy bool
consoleSocket string
pidFile string pidFile string
console string
container libcontainer.Container container libcontainer.Container
listenFDs []*os.File
notifySocket *notifySocket
} }
func (r *runner) run(config *specs.Process) (int, error) { func (r *runner) run(config *specs.Process) (int, error) {
// Check the terminal settings.
if r.detach && config.Terminal && r.consoleSocket == "" {
return -1, fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
}
if (!r.detach || !config.Terminal) && r.consoleSocket != "" {
return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
}
// Create the process.
process, err := newProcess(*config) process, err := newProcess(*config)
if err != nil { if err != nil {
r.destroy() r.destroy()
return -1, err return -1, err
} }
// Setup the listen file descriptors.
if len(r.listenFDs) > 0 { if len(r.listenFDs) > 0 {
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1") process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...) process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
} }
rootuid, err := r.container.Config().HostUID()
// Get the rootuid.
rootuid, err := r.container.Config().HostRootUID()
if err != nil { if err != nil {
r.destroy() r.destroy()
return -1, err return -1, err
} }
tty, err := setupIO(process, rootuid, r.console, config.Terminal, r.detach)
// Get the rootgid.
rootgid, err := r.container.Config().HostRootGID()
if err != nil { if err != nil {
r.destroy() r.destroy()
return -1, err return -1, err
} }
handler := newSignalHandler(tty, r.enableSubreaper)
if err := r.container.Start(process); err != nil { // Setting up IO is a two stage process. We need to modify process to deal
// with detaching containers, and then we get a tty after the container has
// started.
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, r.detach, r.consoleSocket)
if err != nil {
r.destroy()
return -1, err
}
defer tty.Close()
// Run the container.
if err := r.container.Run(process); err != nil {
r.destroy() r.destroy()
tty.Close() tty.Close()
return -1, err return -1, err
} }
if err := tty.ClosePostStart(); err != nil {
// Wait for the tty.
if err := tty.waitConsole(); err != nil {
r.terminate(process) r.terminate(process)
r.destroy() r.destroy()
tty.Close() tty.Close()
return -1, err return -1, err
} }
// Close after start the tty.
if err = tty.ClosePostStart(); err != nil {
r.terminate(process)
r.destroy()
tty.Close()
return -1, err
}
// Create the pid file.
if r.pidFile != "" { if r.pidFile != "" {
if err := createPidFile(r.pidFile, process); err != nil { if err := createPidFile(r.pidFile, process); err != nil {
r.terminate(process) r.terminate(process)
@ -223,16 +332,21 @@ func (r *runner) run(config *specs.Process) (int, error) {
return -1, err return -1, err
} }
} }
if r.detach {
tty.Close() // Forward the handler.
return 0, nil status, err := handler.forward(process, tty, detach)
}
status, err := handler.forward(process)
if err != nil { if err != nil {
r.terminate(process) r.terminate(process)
} }
// Return early if we are detaching.
if r.detach {
return 0, nil
}
// Cleanup.
r.destroy() r.destroy()
tty.Close()
return status, err return status, err
} }
@ -243,27 +357,18 @@ func (r *runner) destroy() {
} }
func (r *runner) terminate(p *libcontainer.Process) { func (r *runner) terminate(p *libcontainer.Process) {
p.Signal(syscall.SIGKILL) _ = p.Signal(unix.SIGKILL)
p.Wait() _, _ = p.Wait()
} }
func sPtr(s string) *string { return &s } func createLibContainerRlimit(rlimit specs.POSIXRlimit) (configs.Rlimit, error) {
func createLibContainerRlimit(rlimit specs.Rlimit) (configs.Rlimit, error) {
rl, err := strToRlimit(rlimit.Type) rl, err := strToRlimit(rlimit.Type)
if err != nil { if err != nil {
return configs.Rlimit{}, err return configs.Rlimit{}, err
} }
return configs.Rlimit{ return configs.Rlimit{
Type: rl, Type: rl,
Hard: uint64(rlimit.Hard), Hard: rlimit.Hard,
Soft: uint64(rlimit.Soft), Soft: rlimit.Soft,
}, nil }, nil
} }
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func setupSdNotify(spec *specs.Spec, notifySocket string) {
spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}})
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
}