update main to use vendor
Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
This commit is contained in:
parent
639756e8c6
commit
e346c2e0ba
9 changed files with 1102 additions and 1003 deletions
99
main.go
99
main.go
|
@ -5,16 +5,17 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
aaprofile "github.com/docker/docker/profiles/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -40,15 +41,14 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
console = os.Getenv("console")
|
||||
containerID string
|
||||
pidFile string
|
||||
root string
|
||||
|
||||
allocateTty bool
|
||||
detach bool
|
||||
readonly bool
|
||||
useSystemdCgroup bool
|
||||
allocateTty bool
|
||||
consoleSocket string
|
||||
detach bool
|
||||
readonly bool
|
||||
|
||||
hooks specs.Hooks
|
||||
hookflags stringSlice
|
||||
|
@ -113,16 +113,14 @@ func (s stringSlice) ParseHooks() (hooks specs.Hooks, err error) {
|
|||
func init() {
|
||||
// Parse flags
|
||||
flag.StringVar(&containerID, "id", IMAGE, "container ID")
|
||||
flag.StringVar(&console, "console", console, "the pty slave path for use with the container")
|
||||
flag.StringVar(&pidFile, "pid-file", "", "specify the file to write the process id to")
|
||||
flag.StringVar(&root, "root", defaultRoot, "root directory of container state, should be tmpfs")
|
||||
|
||||
flag.Var(&hookflags, "hook", "Hooks to prefill into spec file. (ex. --hook prestart:netns)")
|
||||
|
||||
flag.BoolVar(&allocateTty, "t", true, "allocate a tty for the container")
|
||||
flag.StringVar(&consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
|
||||
flag.BoolVar(&detach, "d", false, "detach from the container's process")
|
||||
// TODO (jess): do not enable this flag, the error is very gross on systemd
|
||||
// flag.BoolVar(&useSystemdCgroup, "systemd-cgroup", false, "enable systemd cgroup support")
|
||||
flag.BoolVar(&readonly, "read-only", false, "make container filesystem readonly")
|
||||
|
||||
flag.BoolVar(&version, "version", false, "print version and exit")
|
||||
|
@ -141,17 +139,33 @@ func init() {
|
|||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Set log level
|
||||
// Set log level.
|
||||
if debug {
|
||||
logrus.SetLevel(logrus.DebugLevel)
|
||||
}
|
||||
|
||||
// parse the hook flags
|
||||
// Parse the hook flags.
|
||||
var err error
|
||||
hooks, err = hookflags.ParseHooks()
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
// Convert pid-file to an absolute path so we can write to the
|
||||
// right file after chdir to bundle.
|
||||
if pidFile != "" {
|
||||
pidFile, err = filepath.Abs(pidFile)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Get the absolute path to the root.
|
||||
root, err = filepath.Abs(root)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//go:generate go run generate.go
|
||||
|
@ -161,28 +175,27 @@ func main() {
|
|||
return
|
||||
}
|
||||
|
||||
notifySocket := os.Getenv("NOTIFY_SOCKET")
|
||||
if notifySocket != "" {
|
||||
setupSdNotify(spec, notifySocket)
|
||||
}
|
||||
// Initialize the spec.
|
||||
spec := specconv.Example()
|
||||
|
||||
// override the cmd in the spec with any args specified
|
||||
if len(flag.Args()) > 0 {
|
||||
spec.Process.Args = flag.Args()
|
||||
}
|
||||
// Set the spec to be rootless.
|
||||
specconv.ToRootless(spec)
|
||||
|
||||
// setup readonly fs in spec
|
||||
// Setup readonly fs in spec.
|
||||
spec.Root.Readonly = readonly
|
||||
|
||||
// setup tty in spec
|
||||
// Setup tty in spec.
|
||||
spec.Process.Terminal = allocateTty
|
||||
|
||||
// pass in any hooks
|
||||
spec.Hooks = hooks
|
||||
// Pass in any hooks to the spec.
|
||||
spec.Hooks = &hooks
|
||||
|
||||
// install the default apparmor profile
|
||||
// Set the default seccomp profile.
|
||||
spec.Linux.Seccomp = defaultSeccompProfile
|
||||
|
||||
// Install the default apparmor profile.
|
||||
if apparmor.IsEnabled() {
|
||||
// check if we have the docker-default apparmor profile loaded
|
||||
// Check if we have the docker-default apparmor profile loaded.
|
||||
if _, err := aaprofile.IsLoaded(defaultApparmorProfile); err != nil {
|
||||
logrus.Warnf("AppArmor enabled on system but the %s profile is not loaded. apparmor_parser needs root to load a profile so we can't do it for you.", defaultApparmorProfile)
|
||||
} else {
|
||||
|
@ -190,45 +203,23 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
// set the CgroupsPath as this user
|
||||
u, err := user.CurrentUser()
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
spec.Linux.CgroupsPath = sPtr(u.Name)
|
||||
|
||||
// setup UID mappings
|
||||
spec.Linux.UIDMappings = []specs.IDMapping{
|
||||
{
|
||||
HostID: uint32(u.Uid),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
},
|
||||
}
|
||||
|
||||
// setup GID mappings
|
||||
spec.Linux.GIDMappings = []specs.IDMapping{
|
||||
{
|
||||
HostID: uint32(u.Gid),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
},
|
||||
}
|
||||
|
||||
// Unpack the rootfs.
|
||||
if err := unpackRootfs(spec); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
status, err := startContainer(spec, containerID, pidFile, detach, useSystemdCgroup)
|
||||
// Start the container.
|
||||
status, err := startContainer(spec, containerID, pidFile, consoleSocket, root, detach)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
// Remove the rootfs after the container has exited.
|
||||
if err := os.RemoveAll(defaultRootfsDir); err != nil {
|
||||
logrus.Warnf("removing rootfs failed: %v", err)
|
||||
}
|
||||
|
||||
// exit with the container's exit status
|
||||
// Exit with the container's exit status.
|
||||
os.Exit(status)
|
||||
}
|
||||
|
||||
|
|
106
notify_socket.go
Normal file
106
notify_socket.go
Normal file
|
@ -0,0 +1,106 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type notifySocket struct {
|
||||
socket *net.UnixConn
|
||||
host string
|
||||
socketPath string
|
||||
}
|
||||
|
||||
func newNotifySocket(id, root string) *notifySocket {
|
||||
if os.Getenv("NOTIFY_SOCKET") == "" {
|
||||
// Return early if we do not have a NOTIFY_SOCKET.
|
||||
return nil
|
||||
}
|
||||
|
||||
path := filepath.Join(filepath.Join(root, id), "notify.sock")
|
||||
|
||||
notifySocket := ¬ifySocket{
|
||||
socket: nil,
|
||||
host: os.Getenv("NOTIFY_SOCKET"),
|
||||
socketPath: path,
|
||||
}
|
||||
|
||||
return notifySocket
|
||||
}
|
||||
|
||||
func (s *notifySocket) Close() error {
|
||||
return s.socket.Close()
|
||||
}
|
||||
|
||||
// If systemd is supporting sd_notify protocol, this function will add support
|
||||
// for sd_notify protocol from within the container.
|
||||
func (s *notifySocket) setupSpec(spec *specs.Spec) {
|
||||
mount := specs.Mount{Destination: s.host, Type: "bind", Source: s.socketPath, Options: []string{"bind"}}
|
||||
spec.Mounts = append(spec.Mounts, mount)
|
||||
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
|
||||
}
|
||||
|
||||
func (s *notifySocket) setupSocket() error {
|
||||
addr := net.UnixAddr{
|
||||
Name: s.socketPath,
|
||||
Net: "unixgram",
|
||||
}
|
||||
|
||||
socket, err := net.ListenUnixgram("unixgram", &addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.socket = socket
|
||||
return nil
|
||||
}
|
||||
|
||||
// pid1 must be set only with -d, as it is used to set the new process as the main process
|
||||
// for the service in butts
|
||||
func (notifySocket *notifySocket) run(pid1 int) {
|
||||
buf := make([]byte, 512)
|
||||
notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
|
||||
client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
return
|
||||
}
|
||||
for {
|
||||
r, err := notifySocket.socket.Read(buf)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
var out bytes.Buffer
|
||||
for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
|
||||
if bytes.HasPrefix(line, []byte("READY=")) {
|
||||
_, err = out.Write(line)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
_, err = out.Write([]byte{'\n'})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
_, err = client.Write(out.Bytes())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// now we can inform butts to use pid1 as the pid to monitor
|
||||
if pid1 > 0 {
|
||||
newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
||||
client.Write([]byte(newPid))
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -43,7 +43,7 @@ var rlimitMap = map[string]int{
|
|||
func strToRlimit(key string) (int, error) {
|
||||
rl, ok := rlimitMap[key]
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("Wrong rlimit value: %s", key)
|
||||
return 0, fmt.Errorf("wrong rlimit value: %s", key)
|
||||
}
|
||||
return rl, nil
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ func unpackRootfs(spec *specs.Spec) error {
|
|||
}
|
||||
|
||||
r := bytes.NewReader(data)
|
||||
if err := archive.Untar(r, defaultRootfsDir, nil); err != nil {
|
||||
if err := archive.Untar(r, defaultRootfsDir, &archive.TarOptions{NoLchown: true}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
1272
seccomp.go
1272
seccomp.go
File diff suppressed because it is too large
Load diff
60
signals.go
60
signals.go
|
@ -3,19 +3,23 @@ package main
|
|||
import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"syscall" // only for Signal
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const signalBufferSize = 2048
|
||||
|
||||
// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals
|
||||
// while still forwarding all other signals to the process.
|
||||
func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler {
|
||||
// If notifySocket is present, use it to read butts notifications from the container and
|
||||
// forward them to notifySocketHost.
|
||||
func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) *signalHandler {
|
||||
if enableSubreaper {
|
||||
// set us as the subreaper before registering the signal handler for the container
|
||||
if err := system.SetSubreaper(1); err != nil {
|
||||
|
@ -28,8 +32,8 @@ func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler {
|
|||
// handle all signals for the process.
|
||||
signal.Notify(s)
|
||||
return &signalHandler{
|
||||
tty: tty,
|
||||
signals: s,
|
||||
signals: s,
|
||||
notifySocket: notifySocket,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,26 +45,43 @@ type exit struct {
|
|||
}
|
||||
|
||||
type signalHandler struct {
|
||||
signals chan os.Signal
|
||||
tty *tty
|
||||
signals chan os.Signal
|
||||
notifySocket *notifySocket
|
||||
}
|
||||
|
||||
// forward handles the main signal event loop forwarding, resizing, or reaping depending
|
||||
// on the signal received.
|
||||
func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
|
||||
func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach bool) (int, error) {
|
||||
// make sure we know the pid of our main process so that we can return
|
||||
// after it dies.
|
||||
if detach && h.notifySocket == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
pid1, err := process.Pid()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
// perform the initial tty resize.
|
||||
h.tty.resize()
|
||||
|
||||
if h.notifySocket != nil {
|
||||
if detach {
|
||||
h.notifySocket.run(pid1)
|
||||
return 0, nil
|
||||
} else {
|
||||
go h.notifySocket.run(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Perform the initial tty resize. Always ignore errors resizing because
|
||||
// stdout might have disappeared (due to races with when SIGHUP is sent).
|
||||
_ = tty.resize()
|
||||
// Handle and forward signals.
|
||||
for s := range h.signals {
|
||||
switch s {
|
||||
case syscall.SIGWINCH:
|
||||
h.tty.resize()
|
||||
case syscall.SIGCHLD:
|
||||
case unix.SIGWINCH:
|
||||
// Ignore errors resizing, as above.
|
||||
_ = tty.resize()
|
||||
case unix.SIGCHLD:
|
||||
exits, err := h.reap()
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
|
@ -75,12 +96,15 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
|
|||
// status because we must ensure that any of the go specific process
|
||||
// fun such as flushing pipes are complete before we return.
|
||||
process.Wait()
|
||||
if h.notifySocket != nil {
|
||||
h.notifySocket.Close()
|
||||
}
|
||||
return e.status, nil
|
||||
}
|
||||
}
|
||||
default:
|
||||
logrus.Debugf("sending signal to process %s", s)
|
||||
if err := syscall.Kill(pid1, s.(syscall.Signal)); err != nil {
|
||||
if err := unix.Kill(pid1, s.(syscall.Signal)); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
|
@ -92,13 +116,13 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
|
|||
// then returns all exits to the main event loop for further processing.
|
||||
func (h *signalHandler) reap() (exits []exit, err error) {
|
||||
var (
|
||||
ws syscall.WaitStatus
|
||||
rus syscall.Rusage
|
||||
ws unix.WaitStatus
|
||||
rus unix.Rusage
|
||||
)
|
||||
for {
|
||||
pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, &rus)
|
||||
pid, err := unix.Wait4(-1, &ws, unix.WNOHANG, &rus)
|
||||
if err != nil {
|
||||
if err == syscall.ECHILD {
|
||||
if err == unix.ECHILD {
|
||||
return exits, nil
|
||||
}
|
||||
return nil, err
|
||||
|
|
166
spec.go
166
spec.go
|
@ -1,166 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
var (
|
||||
spec = &specs.Spec{
|
||||
Version: specs.Version,
|
||||
Platform: specs.Platform{
|
||||
OS: runtime.GOOS,
|
||||
Arch: runtime.GOARCH,
|
||||
},
|
||||
Root: specs.Root{
|
||||
Path: "rootfs",
|
||||
Readonly: true,
|
||||
},
|
||||
Process: specs.Process{
|
||||
Terminal: true,
|
||||
User: specs.User{},
|
||||
Args: []string{
|
||||
"sh",
|
||||
},
|
||||
Env: []string{
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"TERM=xterm",
|
||||
},
|
||||
Cwd: "/",
|
||||
NoNewPrivileges: true,
|
||||
Capabilities: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Rlimits: []specs.Rlimit{
|
||||
{
|
||||
Type: "RLIMIT_NOFILE",
|
||||
Hard: uint64(1024),
|
||||
Soft: uint64(1024),
|
||||
},
|
||||
},
|
||||
},
|
||||
Hostname: "ctr",
|
||||
Mounts: []specs.Mount{
|
||||
{
|
||||
Destination: "/proc",
|
||||
Type: "proc",
|
||||
Source: "proc",
|
||||
Options: nil,
|
||||
},
|
||||
{
|
||||
Destination: "/dev",
|
||||
Type: "tmpfs",
|
||||
Source: "tmpfs",
|
||||
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/pts",
|
||||
Type: "devpts",
|
||||
Source: "devpts",
|
||||
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/shm",
|
||||
Type: "tmpfs",
|
||||
Source: "shm",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/mqueue",
|
||||
Type: "mqueue",
|
||||
Source: "mqueue",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys",
|
||||
Type: "sysfs",
|
||||
Source: "sysfs",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "ro"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys/fs/cgroup",
|
||||
Type: "cgroup",
|
||||
Source: "cgroup",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "relatime"},
|
||||
},
|
||||
},
|
||||
Linux: specs.Linux{
|
||||
MaskedPaths: []string{
|
||||
"/proc/kcore",
|
||||
"/proc/latency_stats",
|
||||
"/proc/timer_stats",
|
||||
"/proc/sched_debug",
|
||||
},
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/asound",
|
||||
"/proc/bus",
|
||||
"/proc/fs",
|
||||
"/proc/irq",
|
||||
"/proc/sys",
|
||||
"/proc/sysrq-trigger",
|
||||
},
|
||||
Resources: &specs.Resources{
|
||||
Devices: []specs.DeviceCgroup{
|
||||
{
|
||||
Allow: false,
|
||||
Access: sPtr("rwm"),
|
||||
},
|
||||
},
|
||||
},
|
||||
Namespaces: []specs.Namespace{
|
||||
{
|
||||
Type: "pid",
|
||||
},
|
||||
{
|
||||
Type: "ipc",
|
||||
},
|
||||
{
|
||||
Type: "network",
|
||||
},
|
||||
{
|
||||
Type: "user",
|
||||
},
|
||||
{
|
||||
Type: "uts",
|
||||
},
|
||||
{
|
||||
Type: "mount",
|
||||
},
|
||||
},
|
||||
Seccomp: defaultSeccompProfile,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// loadSpec loads the specification from the provided path.
|
||||
// If the path is empty then the default path will be "config.json"
|
||||
func loadSpec(cPath string) (spec *specs.Spec, err error) {
|
||||
cf, err := os.Open(cPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("JSON specification file %s not found", cPath)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer cf.Close()
|
||||
|
||||
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return spec, nil
|
||||
}
|
125
tty.go
125
tty.go
|
@ -4,16 +4,34 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
|
||||
"github.com/docker/docker/pkg/term"
|
||||
"github.com/containerd/console"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// setup standard pipes so that the TTY of the calling runc process
|
||||
// is not inherited by the container.
|
||||
func createStdioPipes(p *libcontainer.Process, rootuid int) (*tty, error) {
|
||||
i, err := p.InitializeIO(rootuid)
|
||||
type tty struct {
|
||||
epoller *console.Epoller
|
||||
console *console.EpollConsole
|
||||
stdin console.Console
|
||||
closers []io.Closer
|
||||
postStart []io.Closer
|
||||
wg sync.WaitGroup
|
||||
consoleC chan error
|
||||
}
|
||||
|
||||
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
|
||||
defer t.wg.Done()
|
||||
io.Copy(w, r)
|
||||
r.Close()
|
||||
}
|
||||
|
||||
// setup pipes for the process so that advanced features like c/r are able to easily checkpoint
|
||||
// and restore the process's IO without depending on a host specific path or device
|
||||
func setupProcessPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) {
|
||||
i, err := p.InitializeIO(rootuid, rootgid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -44,45 +62,66 @@ func createStdioPipes(p *libcontainer.Process, rootuid int) (*tty, error) {
|
|||
return t, nil
|
||||
}
|
||||
|
||||
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
|
||||
defer t.wg.Done()
|
||||
io.Copy(w, r)
|
||||
r.Close()
|
||||
func inheritStdio(process *libcontainer.Process) error {
|
||||
process.Stdin = os.Stdin
|
||||
process.Stdout = os.Stdout
|
||||
process.Stderr = os.Stderr
|
||||
return nil
|
||||
}
|
||||
|
||||
func createTty(p *libcontainer.Process, rootuid int, consolePath string) (*tty, error) {
|
||||
if consolePath != "" {
|
||||
if err := p.ConsoleFromPath(consolePath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &tty{}, nil
|
||||
}
|
||||
console, err := p.NewConsole(rootuid)
|
||||
func (t *tty) recvtty(process *libcontainer.Process, socket *os.File) error {
|
||||
f, err := utils.RecvFd(socket)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
go io.Copy(console, os.Stdin)
|
||||
go io.Copy(os.Stdout, console)
|
||||
cons, err := console.ConsoleFromFile(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
console.ClearONLCR(cons.Fd())
|
||||
epoller, err := console.NewEpoller()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
epollConsole, err := epoller.Add(cons)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go epoller.Wait()
|
||||
go io.Copy(epollConsole, os.Stdin)
|
||||
t.wg.Add(1)
|
||||
go t.copyIO(os.Stdout, epollConsole)
|
||||
|
||||
state, err := term.SetRawTerminal(os.Stdin.Fd())
|
||||
// set raw mode to stdin and also handle interrupt
|
||||
stdin, err := console.ConsoleFromFile(os.Stdin)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to set the terminal from the stdin: %v", err)
|
||||
return err
|
||||
}
|
||||
return &tty{
|
||||
console: console,
|
||||
state: state,
|
||||
closers: []io.Closer{
|
||||
console,
|
||||
},
|
||||
}, nil
|
||||
if err := stdin.SetRaw(); err != nil {
|
||||
return fmt.Errorf("failed to set the terminal from the stdin: %v", err)
|
||||
}
|
||||
go handleInterrupt(stdin)
|
||||
|
||||
t.epoller = epoller
|
||||
t.stdin = stdin
|
||||
t.console = epollConsole
|
||||
t.closers = []io.Closer{epollConsole}
|
||||
return nil
|
||||
}
|
||||
|
||||
type tty struct {
|
||||
console libcontainer.Console
|
||||
state *term.State
|
||||
closers []io.Closer
|
||||
postStart []io.Closer
|
||||
wg sync.WaitGroup
|
||||
func handleInterrupt(c console.Console) {
|
||||
sigchan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigchan, os.Interrupt)
|
||||
<-sigchan
|
||||
c.Reset()
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func (t *tty) waitConsole() error {
|
||||
if t.consoleC != nil {
|
||||
return <-t.consoleC
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClosePostStart closes any fds that are provided to the container and dup2'd
|
||||
|
@ -101,13 +140,17 @@ func (t *tty) Close() error {
|
|||
for _, c := range t.postStart {
|
||||
c.Close()
|
||||
}
|
||||
// wait for the copy routines to finish before closing the fds
|
||||
// the process is gone at this point, shutting down the console if we have
|
||||
// one and wait for all IO to be finished
|
||||
if t.console != nil && t.epoller != nil {
|
||||
t.console.Shutdown(t.epoller.CloseConsole)
|
||||
}
|
||||
t.wg.Wait()
|
||||
for _, c := range t.closers {
|
||||
c.Close()
|
||||
}
|
||||
if t.state != nil {
|
||||
term.RestoreTerminal(os.Stdin.Fd(), t.state)
|
||||
if t.stdin != nil {
|
||||
t.stdin.Reset()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -116,9 +159,5 @@ func (t *tty) resize() error {
|
|||
if t.console == nil {
|
||||
return nil
|
||||
}
|
||||
ws, err := term.GetWinsize(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return term.SetWinsize(t.console.Fd(), ws)
|
||||
return t.console.ResizeFrom(console.Current())
|
||||
}
|
||||
|
|
273
utils.go
273
utils.go
|
@ -2,74 +2,91 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"strconv"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/coreos/go-systemd/activation"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// startContainer starts the container. Returns the exit status or -1 and an
|
||||
// error. Signals sent to the current process will be forwarded to container.
|
||||
func startContainer(spec *specs.Spec, id, pidFile string, detach, useSystemdCgroup bool) (int, error) {
|
||||
// create the libcontainer config
|
||||
func startContainer(spec *specs.Spec, id, pidFile, consoleSocket, root string, detach bool) (int, error) {
|
||||
notifySocket := newNotifySocket(id, root)
|
||||
if notifySocket != nil {
|
||||
// Setup the spec for the notify socket.
|
||||
notifySocket.setupSpec(spec)
|
||||
}
|
||||
|
||||
// Create the libcontainer config.
|
||||
useSystemdCgroup := false
|
||||
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
|
||||
CgroupName: id,
|
||||
UseSystemdCgroup: useSystemdCgroup,
|
||||
NoPivotRoot: false,
|
||||
NoNewKeyring: false,
|
||||
Spec: spec,
|
||||
Rootless: true,
|
||||
})
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(config.Rootfs); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return -1, fmt.Errorf("rootfs (%q) does not exist", config.Rootfs)
|
||||
}
|
||||
return -1, err
|
||||
}
|
||||
|
||||
factory, err := loadFactory(useSystemdCgroup)
|
||||
// Load the factory.
|
||||
factory, err := loadFactory(root, useSystemdCgroup)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Create the factory.
|
||||
container, err := factory.Create(id, config)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Support on-demand socket activation by passing file descriptors into the container init process.
|
||||
if notifySocket != nil {
|
||||
// Setup the socket for the notify socket.
|
||||
err := notifySocket.setupSocket()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
}
|
||||
|
||||
// Support on-demand socket activation by passing file descriptors into
|
||||
// the container init process.
|
||||
listenFDs := []*os.File{}
|
||||
if os.Getenv("LISTEN_FDS") != "" {
|
||||
listenFDs = activation.Files(false)
|
||||
}
|
||||
|
||||
// Initialize the runner.
|
||||
r := &runner{
|
||||
enableSubreaper: true,
|
||||
shouldDestroy: true,
|
||||
container: container,
|
||||
console: console,
|
||||
listenFDs: listenFDs,
|
||||
notifySocket: notifySocket,
|
||||
consoleSocket: consoleSocket,
|
||||
detach: detach,
|
||||
pidFile: pidFile,
|
||||
listenFDs: listenFDs,
|
||||
}
|
||||
return r.run(&spec.Process)
|
||||
// Run the process.
|
||||
return r.run(spec.Process)
|
||||
}
|
||||
|
||||
// loadFactory returns the configured factory instance for execing containers.
|
||||
func loadFactory(useSystemdCgroup bool) (libcontainer.Factory, error) {
|
||||
abs, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
func loadFactory(root string, useSystemdCgroup bool) (libcontainer.Factory, error) {
|
||||
// Setup the cgroups manager. Default is cgroupfs.
|
||||
cgroupManager := libcontainer.Cgroupfs
|
||||
if useSystemdCgroup {
|
||||
if systemd.UseSystemd() {
|
||||
|
@ -78,25 +95,61 @@ func loadFactory(useSystemdCgroup bool) (libcontainer.Factory, error) {
|
|||
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
|
||||
}
|
||||
}
|
||||
return libcontainer.New(abs, cgroupManager, func(l *libcontainer.LinuxFactory) error {
|
||||
return nil
|
||||
})
|
||||
|
||||
// We resolve the paths for {newuidmap,newgidmap} from the context of runc,
|
||||
// to avoid doing a path lookup in the nsexec context. TODO: The binary
|
||||
// names are not currently configurable.
|
||||
newuidmap, err := exec.LookPath("newuidmap")
|
||||
if err != nil {
|
||||
newuidmap = ""
|
||||
}
|
||||
newgidmap, err := exec.LookPath("newgidmap")
|
||||
if err != nil {
|
||||
newgidmap = ""
|
||||
}
|
||||
|
||||
// Create the new libcontainer factory.
|
||||
return libcontainer.New(root, cgroupManager, nil, nil,
|
||||
libcontainer.NewuidmapPath(newuidmap),
|
||||
libcontainer.NewgidmapPath(newgidmap))
|
||||
}
|
||||
|
||||
// newProcess returns a new libcontainer Process with the arguments from the
|
||||
// spec and stdio from the current process.
|
||||
func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
||||
// Create the libcontainer process.
|
||||
lp := &libcontainer.Process{
|
||||
Args: p.Args,
|
||||
Env: p.Env,
|
||||
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
|
||||
Args: p.Args,
|
||||
Env: p.Env,
|
||||
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
|
||||
Cwd: p.Cwd,
|
||||
Capabilities: p.Capabilities,
|
||||
Label: p.SelinuxLabel,
|
||||
NoNewPrivileges: &p.NoNewPrivileges,
|
||||
AppArmorProfile: p.ApparmorProfile,
|
||||
}
|
||||
|
||||
// Setup the console size.
|
||||
if p.ConsoleSize != nil {
|
||||
lp.ConsoleWidth = uint16(p.ConsoleSize.Width)
|
||||
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
|
||||
}
|
||||
|
||||
// Convert the capabilities.
|
||||
if p.Capabilities != nil {
|
||||
lp.Capabilities = &configs.Capabilities{}
|
||||
lp.Capabilities.Bounding = p.Capabilities.Bounding
|
||||
lp.Capabilities.Effective = p.Capabilities.Effective
|
||||
lp.Capabilities.Inheritable = p.Capabilities.Inheritable
|
||||
lp.Capabilities.Permitted = p.Capabilities.Permitted
|
||||
lp.Capabilities.Ambient = p.Capabilities.Ambient
|
||||
}
|
||||
|
||||
// Setup the additional user groups.
|
||||
for _, gid := range p.User.AdditionalGids {
|
||||
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
|
||||
}
|
||||
|
||||
// Setup the Rlimits.
|
||||
for _, rlimit := range p.Rlimits {
|
||||
rl, err := createLibContainerRlimit(rlimit)
|
||||
if err != nil {
|
||||
|
@ -104,23 +157,8 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
|||
}
|
||||
lp.Rlimits = append(lp.Rlimits, rl)
|
||||
}
|
||||
return lp, nil
|
||||
}
|
||||
|
||||
func dupStdio(process *libcontainer.Process, rootuid int) error {
|
||||
process.Stdin = os.Stdin
|
||||
process.Stdout = os.Stdout
|
||||
process.Stderr = os.Stderr
|
||||
for _, fd := range []uintptr{
|
||||
os.Stdin.Fd(),
|
||||
os.Stdout.Fd(),
|
||||
os.Stderr.Fd(),
|
||||
} {
|
||||
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return lp, nil
|
||||
}
|
||||
|
||||
func destroy(container libcontainer.Container) {
|
||||
|
@ -129,24 +167,55 @@ func destroy(container libcontainer.Container) {
|
|||
}
|
||||
}
|
||||
|
||||
// setupIO sets the proper IO on the process depending on the configuration
|
||||
// If there is a nil error then there must be a non nil tty returned
|
||||
func setupIO(process *libcontainer.Process, rootuid int, console string, createTTY, detach bool) (*tty, error) {
|
||||
// detach and createTty will not work unless a console path is passed
|
||||
// so error out here before changing any terminal settings
|
||||
if createTTY && detach && console == "" {
|
||||
return nil, fmt.Errorf("cannot allocate tty if runc will detach")
|
||||
}
|
||||
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
|
||||
if createTTY {
|
||||
return createTty(process, rootuid, console)
|
||||
process.Stdin = nil
|
||||
process.Stdout = nil
|
||||
process.Stderr = nil
|
||||
t := &tty{}
|
||||
if !detach {
|
||||
parent, child, err := utils.NewSockPair("console")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
process.ConsoleSocket = child
|
||||
t.postStart = append(t.postStart, parent, child)
|
||||
t.consoleC = make(chan error, 1)
|
||||
go func() {
|
||||
if err := t.recvtty(process, parent); err != nil {
|
||||
t.consoleC <- err
|
||||
}
|
||||
t.consoleC <- nil
|
||||
}()
|
||||
} else {
|
||||
// the caller of runc will handle receiving the console master
|
||||
conn, err := net.Dial("unix", sockpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
uc, ok := conn.(*net.UnixConn)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("casting to UnixConn failed")
|
||||
}
|
||||
t.postStart = append(t.postStart, uc)
|
||||
socket, err := uc.File()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t.postStart = append(t.postStart, socket)
|
||||
process.ConsoleSocket = socket
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
|
||||
// and the container's process inherits runc's stdio.
|
||||
if detach {
|
||||
if err := dupStdio(process, rootuid); err != nil {
|
||||
if err := inheritStdio(process); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &tty{}, nil
|
||||
}
|
||||
return createStdioPipes(process, rootuid)
|
||||
return setupProcessPipes(process, rootuid, rootgid)
|
||||
}
|
||||
|
||||
// createPidFile creates a file with the processes pid inside it atomically
|
||||
|
@ -175,46 +244,86 @@ func createPidFile(path string, process *libcontainer.Process) error {
|
|||
|
||||
type runner struct {
|
||||
enableSubreaper bool
|
||||
shouldDestroy bool
|
||||
detach bool
|
||||
listenFDs []*os.File
|
||||
shouldDestroy bool
|
||||
consoleSocket string
|
||||
pidFile string
|
||||
console string
|
||||
container libcontainer.Container
|
||||
listenFDs []*os.File
|
||||
notifySocket *notifySocket
|
||||
}
|
||||
|
||||
func (r *runner) run(config *specs.Process) (int, error) {
|
||||
// Check the terminal settings.
|
||||
if r.detach && config.Terminal && r.consoleSocket == "" {
|
||||
return -1, fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
|
||||
}
|
||||
if (!r.detach || !config.Terminal) && r.consoleSocket != "" {
|
||||
return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
|
||||
}
|
||||
|
||||
// Create the process.
|
||||
process, err := newProcess(*config)
|
||||
if err != nil {
|
||||
r.destroy()
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Setup the listen file descriptors.
|
||||
if len(r.listenFDs) > 0 {
|
||||
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
|
||||
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
|
||||
}
|
||||
rootuid, err := r.container.Config().HostUID()
|
||||
|
||||
// Get the rootuid.
|
||||
rootuid, err := r.container.Config().HostRootUID()
|
||||
if err != nil {
|
||||
r.destroy()
|
||||
return -1, err
|
||||
}
|
||||
tty, err := setupIO(process, rootuid, r.console, config.Terminal, r.detach)
|
||||
|
||||
// Get the rootgid.
|
||||
rootgid, err := r.container.Config().HostRootGID()
|
||||
if err != nil {
|
||||
r.destroy()
|
||||
return -1, err
|
||||
}
|
||||
handler := newSignalHandler(tty, r.enableSubreaper)
|
||||
if err := r.container.Start(process); err != nil {
|
||||
|
||||
// Setting up IO is a two stage process. We need to modify process to deal
|
||||
// with detaching containers, and then we get a tty after the container has
|
||||
// started.
|
||||
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
|
||||
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, r.detach, r.consoleSocket)
|
||||
if err != nil {
|
||||
r.destroy()
|
||||
return -1, err
|
||||
}
|
||||
defer tty.Close()
|
||||
|
||||
// Run the container.
|
||||
if err := r.container.Run(process); err != nil {
|
||||
r.destroy()
|
||||
tty.Close()
|
||||
return -1, err
|
||||
}
|
||||
if err := tty.ClosePostStart(); err != nil {
|
||||
|
||||
// Wait for the tty.
|
||||
if err := tty.waitConsole(); err != nil {
|
||||
r.terminate(process)
|
||||
r.destroy()
|
||||
tty.Close()
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Close after start the tty.
|
||||
if err = tty.ClosePostStart(); err != nil {
|
||||
r.terminate(process)
|
||||
r.destroy()
|
||||
tty.Close()
|
||||
return -1, err
|
||||
}
|
||||
|
||||
// Create the pid file.
|
||||
if r.pidFile != "" {
|
||||
if err := createPidFile(r.pidFile, process); err != nil {
|
||||
r.terminate(process)
|
||||
|
@ -223,16 +332,21 @@ func (r *runner) run(config *specs.Process) (int, error) {
|
|||
return -1, err
|
||||
}
|
||||
}
|
||||
if r.detach {
|
||||
tty.Close()
|
||||
return 0, nil
|
||||
}
|
||||
status, err := handler.forward(process)
|
||||
|
||||
// Forward the handler.
|
||||
status, err := handler.forward(process, tty, detach)
|
||||
if err != nil {
|
||||
r.terminate(process)
|
||||
}
|
||||
|
||||
// Return early if we are detaching.
|
||||
if r.detach {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Cleanup.
|
||||
r.destroy()
|
||||
tty.Close()
|
||||
|
||||
return status, err
|
||||
}
|
||||
|
||||
|
@ -243,27 +357,18 @@ func (r *runner) destroy() {
|
|||
}
|
||||
|
||||
func (r *runner) terminate(p *libcontainer.Process) {
|
||||
p.Signal(syscall.SIGKILL)
|
||||
p.Wait()
|
||||
_ = p.Signal(unix.SIGKILL)
|
||||
_, _ = p.Wait()
|
||||
}
|
||||
|
||||
func sPtr(s string) *string { return &s }
|
||||
|
||||
func createLibContainerRlimit(rlimit specs.Rlimit) (configs.Rlimit, error) {
|
||||
func createLibContainerRlimit(rlimit specs.POSIXRlimit) (configs.Rlimit, error) {
|
||||
rl, err := strToRlimit(rlimit.Type)
|
||||
if err != nil {
|
||||
return configs.Rlimit{}, err
|
||||
}
|
||||
return configs.Rlimit{
|
||||
Type: rl,
|
||||
Hard: uint64(rlimit.Hard),
|
||||
Soft: uint64(rlimit.Soft),
|
||||
Hard: rlimit.Hard,
|
||||
Soft: rlimit.Soft,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// If systemd is supporting sd_notify protocol, this function will add support
|
||||
// for sd_notify protocol from within the container.
|
||||
func setupSdNotify(spec *specs.Spec, notifySocket string) {
|
||||
spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}})
|
||||
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue