e346c2e0ba
Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
374 lines
9.5 KiB
Go
374 lines
9.5 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strconv"
|
|
|
|
"github.com/coreos/go-systemd/activation"
|
|
"github.com/opencontainers/runc/libcontainer"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/specconv"
|
|
"github.com/opencontainers/runc/libcontainer/utils"
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// startContainer starts the container. Returns the exit status or -1 and an
|
|
// error. Signals sent to the current process will be forwarded to container.
|
|
func startContainer(spec *specs.Spec, id, pidFile, consoleSocket, root string, detach bool) (int, error) {
|
|
notifySocket := newNotifySocket(id, root)
|
|
if notifySocket != nil {
|
|
// Setup the spec for the notify socket.
|
|
notifySocket.setupSpec(spec)
|
|
}
|
|
|
|
// Create the libcontainer config.
|
|
useSystemdCgroup := false
|
|
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
|
|
CgroupName: id,
|
|
UseSystemdCgroup: useSystemdCgroup,
|
|
NoPivotRoot: false,
|
|
NoNewKeyring: false,
|
|
Spec: spec,
|
|
Rootless: true,
|
|
})
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
// Load the factory.
|
|
factory, err := loadFactory(root, useSystemdCgroup)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
// Create the factory.
|
|
container, err := factory.Create(id, config)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
if notifySocket != nil {
|
|
// Setup the socket for the notify socket.
|
|
err := notifySocket.setupSocket()
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
}
|
|
|
|
// Support on-demand socket activation by passing file descriptors into
|
|
// the container init process.
|
|
listenFDs := []*os.File{}
|
|
if os.Getenv("LISTEN_FDS") != "" {
|
|
listenFDs = activation.Files(false)
|
|
}
|
|
|
|
// Initialize the runner.
|
|
r := &runner{
|
|
enableSubreaper: true,
|
|
shouldDestroy: true,
|
|
container: container,
|
|
listenFDs: listenFDs,
|
|
notifySocket: notifySocket,
|
|
consoleSocket: consoleSocket,
|
|
detach: detach,
|
|
pidFile: pidFile,
|
|
}
|
|
// Run the process.
|
|
return r.run(spec.Process)
|
|
}
|
|
|
|
// loadFactory returns the configured factory instance for execing containers.
|
|
func loadFactory(root string, useSystemdCgroup bool) (libcontainer.Factory, error) {
|
|
// Setup the cgroups manager. Default is cgroupfs.
|
|
cgroupManager := libcontainer.Cgroupfs
|
|
if useSystemdCgroup {
|
|
if systemd.UseSystemd() {
|
|
cgroupManager = libcontainer.SystemdCgroups
|
|
} else {
|
|
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
|
|
}
|
|
}
|
|
|
|
// We resolve the paths for {newuidmap,newgidmap} from the context of runc,
|
|
// to avoid doing a path lookup in the nsexec context. TODO: The binary
|
|
// names are not currently configurable.
|
|
newuidmap, err := exec.LookPath("newuidmap")
|
|
if err != nil {
|
|
newuidmap = ""
|
|
}
|
|
newgidmap, err := exec.LookPath("newgidmap")
|
|
if err != nil {
|
|
newgidmap = ""
|
|
}
|
|
|
|
// Create the new libcontainer factory.
|
|
return libcontainer.New(root, cgroupManager, nil, nil,
|
|
libcontainer.NewuidmapPath(newuidmap),
|
|
libcontainer.NewgidmapPath(newgidmap))
|
|
}
|
|
|
|
// newProcess returns a new libcontainer Process with the arguments from the
|
|
// spec and stdio from the current process.
|
|
func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
|
// Create the libcontainer process.
|
|
lp := &libcontainer.Process{
|
|
Args: p.Args,
|
|
Env: p.Env,
|
|
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
|
|
Cwd: p.Cwd,
|
|
Label: p.SelinuxLabel,
|
|
NoNewPrivileges: &p.NoNewPrivileges,
|
|
AppArmorProfile: p.ApparmorProfile,
|
|
}
|
|
|
|
// Setup the console size.
|
|
if p.ConsoleSize != nil {
|
|
lp.ConsoleWidth = uint16(p.ConsoleSize.Width)
|
|
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
|
|
}
|
|
|
|
// Convert the capabilities.
|
|
if p.Capabilities != nil {
|
|
lp.Capabilities = &configs.Capabilities{}
|
|
lp.Capabilities.Bounding = p.Capabilities.Bounding
|
|
lp.Capabilities.Effective = p.Capabilities.Effective
|
|
lp.Capabilities.Inheritable = p.Capabilities.Inheritable
|
|
lp.Capabilities.Permitted = p.Capabilities.Permitted
|
|
lp.Capabilities.Ambient = p.Capabilities.Ambient
|
|
}
|
|
|
|
// Setup the additional user groups.
|
|
for _, gid := range p.User.AdditionalGids {
|
|
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
|
|
}
|
|
|
|
// Setup the Rlimits.
|
|
for _, rlimit := range p.Rlimits {
|
|
rl, err := createLibContainerRlimit(rlimit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
lp.Rlimits = append(lp.Rlimits, rl)
|
|
}
|
|
|
|
return lp, nil
|
|
}
|
|
|
|
func destroy(container libcontainer.Container) {
|
|
if err := container.Destroy(); err != nil {
|
|
logrus.Error(err)
|
|
}
|
|
}
|
|
|
|
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
|
|
if createTTY {
|
|
process.Stdin = nil
|
|
process.Stdout = nil
|
|
process.Stderr = nil
|
|
t := &tty{}
|
|
if !detach {
|
|
parent, child, err := utils.NewSockPair("console")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
process.ConsoleSocket = child
|
|
t.postStart = append(t.postStart, parent, child)
|
|
t.consoleC = make(chan error, 1)
|
|
go func() {
|
|
if err := t.recvtty(process, parent); err != nil {
|
|
t.consoleC <- err
|
|
}
|
|
t.consoleC <- nil
|
|
}()
|
|
} else {
|
|
// the caller of runc will handle receiving the console master
|
|
conn, err := net.Dial("unix", sockpath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
uc, ok := conn.(*net.UnixConn)
|
|
if !ok {
|
|
return nil, fmt.Errorf("casting to UnixConn failed")
|
|
}
|
|
t.postStart = append(t.postStart, uc)
|
|
socket, err := uc.File()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
t.postStart = append(t.postStart, socket)
|
|
process.ConsoleSocket = socket
|
|
}
|
|
return t, nil
|
|
}
|
|
// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
|
|
// and the container's process inherits runc's stdio.
|
|
if detach {
|
|
if err := inheritStdio(process); err != nil {
|
|
return nil, err
|
|
}
|
|
return &tty{}, nil
|
|
}
|
|
return setupProcessPipes(process, rootuid, rootgid)
|
|
}
|
|
|
|
// createPidFile creates a file with the processes pid inside it atomically
|
|
// it creates a temp file with the paths filename + '.' infront of it
|
|
// then renames the file
|
|
func createPidFile(path string, process *libcontainer.Process) error {
|
|
pid, err := process.Pid()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var (
|
|
tmpDir = filepath.Dir(path)
|
|
tmpName = filepath.Join(tmpDir, fmt.Sprintf(".%s", filepath.Base(path)))
|
|
)
|
|
f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = fmt.Fprintf(f, "%d", pid)
|
|
f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.Rename(tmpName, path)
|
|
}
|
|
|
|
type runner struct {
|
|
enableSubreaper bool
|
|
detach bool
|
|
shouldDestroy bool
|
|
consoleSocket string
|
|
pidFile string
|
|
container libcontainer.Container
|
|
listenFDs []*os.File
|
|
notifySocket *notifySocket
|
|
}
|
|
|
|
func (r *runner) run(config *specs.Process) (int, error) {
|
|
// Check the terminal settings.
|
|
if r.detach && config.Terminal && r.consoleSocket == "" {
|
|
return -1, fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
|
|
}
|
|
if (!r.detach || !config.Terminal) && r.consoleSocket != "" {
|
|
return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
|
|
}
|
|
|
|
// Create the process.
|
|
process, err := newProcess(*config)
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
|
|
// Setup the listen file descriptors.
|
|
if len(r.listenFDs) > 0 {
|
|
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
|
|
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
|
|
}
|
|
|
|
// Get the rootuid.
|
|
rootuid, err := r.container.Config().HostRootUID()
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
|
|
// Get the rootgid.
|
|
rootgid, err := r.container.Config().HostRootGID()
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
|
|
// Setting up IO is a two stage process. We need to modify process to deal
|
|
// with detaching containers, and then we get a tty after the container has
|
|
// started.
|
|
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
|
|
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, r.detach, r.consoleSocket)
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
defer tty.Close()
|
|
|
|
// Run the container.
|
|
if err := r.container.Run(process); err != nil {
|
|
r.destroy()
|
|
tty.Close()
|
|
return -1, err
|
|
}
|
|
|
|
// Wait for the tty.
|
|
if err := tty.waitConsole(); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
tty.Close()
|
|
return -1, err
|
|
}
|
|
|
|
// Close after start the tty.
|
|
if err = tty.ClosePostStart(); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
tty.Close()
|
|
return -1, err
|
|
}
|
|
|
|
// Create the pid file.
|
|
if r.pidFile != "" {
|
|
if err := createPidFile(r.pidFile, process); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
tty.Close()
|
|
return -1, err
|
|
}
|
|
}
|
|
|
|
// Forward the handler.
|
|
status, err := handler.forward(process, tty, detach)
|
|
if err != nil {
|
|
r.terminate(process)
|
|
}
|
|
|
|
// Return early if we are detaching.
|
|
if r.detach {
|
|
return 0, nil
|
|
}
|
|
|
|
// Cleanup.
|
|
r.destroy()
|
|
|
|
return status, err
|
|
}
|
|
|
|
func (r *runner) destroy() {
|
|
if r.shouldDestroy {
|
|
destroy(r.container)
|
|
}
|
|
}
|
|
|
|
func (r *runner) terminate(p *libcontainer.Process) {
|
|
_ = p.Signal(unix.SIGKILL)
|
|
_, _ = p.Wait()
|
|
}
|
|
|
|
func createLibContainerRlimit(rlimit specs.POSIXRlimit) (configs.Rlimit, error) {
|
|
rl, err := strToRlimit(rlimit.Type)
|
|
if err != nil {
|
|
return configs.Rlimit{}, err
|
|
}
|
|
return configs.Rlimit{
|
|
Type: rl,
|
|
Hard: rlimit.Hard,
|
|
Soft: rlimit.Soft,
|
|
}, nil
|
|
}
|