360 lines
9.2 KiB
Go
360 lines
9.2 KiB
Go
|
// +build linux
|
||
|
|
||
|
package main
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"net"
|
||
|
"os"
|
||
|
"path/filepath"
|
||
|
"strconv"
|
||
|
"syscall"
|
||
|
|
||
|
"github.com/Sirupsen/logrus"
|
||
|
"github.com/coreos/go-systemd/activation"
|
||
|
"github.com/opencontainers/runc/libcontainer"
|
||
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||
|
"github.com/opencontainers/runc/libcontainer/specconv"
|
||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||
|
"github.com/urfave/cli"
|
||
|
)
|
||
|
|
||
|
var errEmptyID = errors.New("container id cannot be empty")
|
||
|
|
||
|
var container libcontainer.Container
|
||
|
|
||
|
// loadFactory returns the configured factory instance for execing containers.
|
||
|
func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
||
|
root := context.GlobalString("root")
|
||
|
abs, err := filepath.Abs(root)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
cgroupManager := libcontainer.Cgroupfs
|
||
|
if context.GlobalBool("systemd-cgroup") {
|
||
|
if systemd.UseSystemd() {
|
||
|
cgroupManager = libcontainer.SystemdCgroups
|
||
|
} else {
|
||
|
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
|
||
|
}
|
||
|
}
|
||
|
return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu")))
|
||
|
}
|
||
|
|
||
|
// getContainer returns the specified container instance by loading it from state
|
||
|
// with the default factory.
|
||
|
func getContainer(context *cli.Context) (libcontainer.Container, error) {
|
||
|
id := context.Args().First()
|
||
|
if id == "" {
|
||
|
return nil, errEmptyID
|
||
|
}
|
||
|
factory, err := loadFactory(context)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return factory.Load(id)
|
||
|
}
|
||
|
|
||
|
func fatalf(t string, v ...interface{}) {
|
||
|
fatal(fmt.Errorf(t, v...))
|
||
|
}
|
||
|
|
||
|
func getDefaultImagePath(context *cli.Context) string {
|
||
|
cwd, err := os.Getwd()
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
return filepath.Join(cwd, "checkpoint")
|
||
|
}
|
||
|
|
||
|
// newProcess returns a new libcontainer Process with the arguments from the
|
||
|
// spec and stdio from the current process.
|
||
|
func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
||
|
lp := &libcontainer.Process{
|
||
|
Args: p.Args,
|
||
|
Env: p.Env,
|
||
|
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
|
||
|
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
|
||
|
Cwd: p.Cwd,
|
||
|
Capabilities: p.Capabilities,
|
||
|
Label: p.SelinuxLabel,
|
||
|
NoNewPrivileges: &p.NoNewPrivileges,
|
||
|
AppArmorProfile: p.ApparmorProfile,
|
||
|
}
|
||
|
for _, gid := range p.User.AdditionalGids {
|
||
|
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
|
||
|
}
|
||
|
for _, rlimit := range p.Rlimits {
|
||
|
rl, err := createLibContainerRlimit(rlimit)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
lp.Rlimits = append(lp.Rlimits, rl)
|
||
|
}
|
||
|
return lp, nil
|
||
|
}
|
||
|
|
||
|
// If systemd is supporting sd_notify protocol, this function will add support
|
||
|
// for sd_notify protocol from within the container.
|
||
|
func setupSdNotify(spec *specs.Spec, notifySocket string) {
|
||
|
spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}})
|
||
|
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
|
||
|
}
|
||
|
|
||
|
func destroy(container libcontainer.Container) {
|
||
|
if err := container.Destroy(); err != nil {
|
||
|
logrus.Error(err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// setupIO modifies the given process config according to the options.
|
||
|
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool) (*tty, error) {
|
||
|
// This is entirely handled by recvtty.
|
||
|
if createTTY {
|
||
|
process.Stdin = nil
|
||
|
process.Stdout = nil
|
||
|
process.Stderr = nil
|
||
|
return &tty{}, nil
|
||
|
}
|
||
|
|
||
|
// When we detach, we just dup over stdio and call it a day. There's no
|
||
|
// requirement that we set up anything nice for our caller or the
|
||
|
// container.
|
||
|
if detach {
|
||
|
if err := dupStdio(process, rootuid, rootgid); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return &tty{}, nil
|
||
|
}
|
||
|
|
||
|
// XXX: This doesn't sit right with me. It's ugly.
|
||
|
return createStdioPipes(process, rootuid, rootgid)
|
||
|
}
|
||
|
|
||
|
// createPidFile creates a file with the processes pid inside it atomically
|
||
|
// it creates a temp file with the paths filename + '.' infront of it
|
||
|
// then renames the file
|
||
|
func createPidFile(path string, process *libcontainer.Process) error {
|
||
|
pid, err := process.Pid()
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
var (
|
||
|
tmpDir = filepath.Dir(path)
|
||
|
tmpName = filepath.Join(tmpDir, fmt.Sprintf(".%s", filepath.Base(path)))
|
||
|
)
|
||
|
f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
_, err = fmt.Fprintf(f, "%d", pid)
|
||
|
f.Close()
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
return os.Rename(tmpName, path)
|
||
|
}
|
||
|
|
||
|
func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
|
||
|
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
|
||
|
CgroupName: id,
|
||
|
UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
|
||
|
NoPivotRoot: context.Bool("no-pivot"),
|
||
|
NoNewKeyring: context.Bool("no-new-keyring"),
|
||
|
Spec: spec,
|
||
|
})
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
factory, err := loadFactory(context)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return factory.Create(id, config)
|
||
|
}
|
||
|
|
||
|
type runner struct {
|
||
|
enableSubreaper bool
|
||
|
shouldDestroy bool
|
||
|
detach bool
|
||
|
listenFDs []*os.File
|
||
|
pidFile string
|
||
|
consoleSocket string
|
||
|
container libcontainer.Container
|
||
|
create bool
|
||
|
}
|
||
|
|
||
|
func (r *runner) terminalinfo() *libcontainer.TerminalInfo {
|
||
|
return libcontainer.NewTerminalInfo(r.container.ID())
|
||
|
}
|
||
|
|
||
|
func (r *runner) run(config *specs.Process) (int, error) {
|
||
|
process, err := newProcess(*config)
|
||
|
if err != nil {
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
if len(r.listenFDs) > 0 {
|
||
|
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
|
||
|
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
|
||
|
}
|
||
|
|
||
|
rootuid, err := r.container.Config().HostUID()
|
||
|
if err != nil {
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
|
||
|
rootgid, err := r.container.Config().HostGID()
|
||
|
if err != nil {
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
|
||
|
detach := r.detach || r.create
|
||
|
|
||
|
// Check command-line for sanity.
|
||
|
if detach && config.Terminal && r.consoleSocket == "" {
|
||
|
r.destroy()
|
||
|
return -1, fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
|
||
|
}
|
||
|
// XXX: Should we change this?
|
||
|
if (!detach || !config.Terminal) && r.consoleSocket != "" {
|
||
|
r.destroy()
|
||
|
return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
|
||
|
}
|
||
|
|
||
|
startFn := r.container.Start
|
||
|
if !r.create {
|
||
|
startFn = r.container.Run
|
||
|
}
|
||
|
// Setting up IO is a two stage process. We need to modify process to deal
|
||
|
// with detaching containers, and then we get a tty after the container has
|
||
|
// started.
|
||
|
handler := newSignalHandler(r.enableSubreaper)
|
||
|
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach)
|
||
|
if err != nil {
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
defer tty.Close()
|
||
|
if err = startFn(process); err != nil {
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
if config.Terminal {
|
||
|
if err = tty.recvtty(process, r.detach || r.create); err != nil {
|
||
|
r.terminate(process)
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if config.Terminal && detach {
|
||
|
conn, err := net.Dial("unix", r.consoleSocket)
|
||
|
if err != nil {
|
||
|
r.terminate(process)
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
defer conn.Close()
|
||
|
|
||
|
unixconn, ok := conn.(*net.UnixConn)
|
||
|
if !ok {
|
||
|
r.terminate(process)
|
||
|
r.destroy()
|
||
|
return -1, fmt.Errorf("casting to UnixConn failed")
|
||
|
}
|
||
|
|
||
|
socket, err := unixconn.File()
|
||
|
if err != nil {
|
||
|
r.terminate(process)
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
defer socket.Close()
|
||
|
|
||
|
err = tty.sendtty(socket, r.terminalinfo())
|
||
|
if err != nil {
|
||
|
r.terminate(process)
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if err = tty.ClosePostStart(); err != nil {
|
||
|
r.terminate(process)
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
if r.pidFile != "" {
|
||
|
if err = createPidFile(r.pidFile, process); err != nil {
|
||
|
r.terminate(process)
|
||
|
r.destroy()
|
||
|
return -1, err
|
||
|
}
|
||
|
}
|
||
|
if detach {
|
||
|
return 0, nil
|
||
|
}
|
||
|
status, err := handler.forward(process, tty)
|
||
|
if err != nil {
|
||
|
r.terminate(process)
|
||
|
}
|
||
|
r.destroy()
|
||
|
return status, err
|
||
|
}
|
||
|
|
||
|
func (r *runner) destroy() {
|
||
|
if r.shouldDestroy {
|
||
|
destroy(r.container)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (r *runner) terminate(p *libcontainer.Process) {
|
||
|
_ = p.Signal(syscall.SIGKILL)
|
||
|
_, _ = p.Wait()
|
||
|
}
|
||
|
|
||
|
func validateProcessSpec(spec *specs.Process) error {
|
||
|
if spec.Cwd == "" {
|
||
|
return fmt.Errorf("Cwd property must not be empty")
|
||
|
}
|
||
|
if !filepath.IsAbs(spec.Cwd) {
|
||
|
return fmt.Errorf("Cwd must be an absolute path")
|
||
|
}
|
||
|
if len(spec.Args) == 0 {
|
||
|
return fmt.Errorf("args must not be empty")
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, error) {
|
||
|
id := context.Args().First()
|
||
|
if id == "" {
|
||
|
return -1, errEmptyID
|
||
|
}
|
||
|
container, err := createContainer(context, id, spec)
|
||
|
if err != nil {
|
||
|
return -1, err
|
||
|
}
|
||
|
// Support on-demand socket activation by passing file descriptors into the container init process.
|
||
|
listenFDs := []*os.File{}
|
||
|
if os.Getenv("LISTEN_FDS") != "" {
|
||
|
listenFDs = activation.Files(false)
|
||
|
}
|
||
|
r := &runner{
|
||
|
enableSubreaper: !context.Bool("no-subreaper"),
|
||
|
shouldDestroy: true,
|
||
|
container: container,
|
||
|
listenFDs: listenFDs,
|
||
|
consoleSocket: context.String("console-socket"),
|
||
|
detach: context.Bool("detach"),
|
||
|
pidFile: context.String("pid-file"),
|
||
|
create: create,
|
||
|
}
|
||
|
return r.run(&spec.Process)
|
||
|
}
|