2017-10-10 20:33:20 +00:00
|
|
|
package libpod
|
|
|
|
|
|
|
|
import (
|
2017-10-20 19:26:08 +00:00
|
|
|
"bytes"
|
2017-10-10 20:33:20 +00:00
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"os/exec"
|
|
|
|
"path/filepath"
|
|
|
|
"syscall"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/containerd/cgroups"
|
|
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
"golang.org/x/sys/unix"
|
|
|
|
|
|
|
|
// TODO import these functions into libpod and remove the import
|
|
|
|
// Trying to keep libpod from depending on CRI-O code
|
|
|
|
"github.com/kubernetes-incubator/cri-o/utils"
|
|
|
|
)
|
|
|
|
|
|
|
|
// OCI code is undergoing heavy rewrite
|
|
|
|
|
|
|
|
const (
|
|
|
|
// CgroupfsCgroupsManager represents cgroupfs native cgroup manager
|
|
|
|
CgroupfsCgroupsManager = "cgroupfs"
|
|
|
|
// SystemdCgroupsManager represents systemd native cgroup manager
|
|
|
|
SystemdCgroupsManager = "systemd"
|
|
|
|
|
|
|
|
// ContainerCreateTimeout represents the value of container creating timeout
|
|
|
|
ContainerCreateTimeout = 240 * time.Second
|
|
|
|
)
|
|
|
|
|
|
|
|
// OCIRuntime represents an OCI-compatible runtime that libpod can call into
|
|
|
|
// to perform container operations
|
|
|
|
type OCIRuntime struct {
|
|
|
|
name string
|
|
|
|
path string
|
|
|
|
conmonPath string
|
|
|
|
conmonEnv []string
|
|
|
|
cgroupManager string
|
2017-10-23 15:36:10 +00:00
|
|
|
tmpDir string
|
2017-10-10 20:33:20 +00:00
|
|
|
exitsDir string
|
2017-10-23 15:36:10 +00:00
|
|
|
socketsDir string
|
2017-10-10 20:33:20 +00:00
|
|
|
logSizeMax int64
|
|
|
|
noPivot bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// syncInfo is used to return data from monitor process to daemon
|
|
|
|
type syncInfo struct {
|
|
|
|
Pid int `json:"pid"`
|
|
|
|
Message string `json:"message,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make a new OCI runtime with provided options
|
2017-10-23 15:36:10 +00:00
|
|
|
func newOCIRuntime(name string, path string, conmonPath string, conmonEnv []string, cgroupManager string, tmpDir string, logSizeMax int64, noPivotRoot bool) (*OCIRuntime, error) {
|
2017-10-10 20:33:20 +00:00
|
|
|
runtime := new(OCIRuntime)
|
|
|
|
runtime.name = name
|
|
|
|
runtime.path = path
|
|
|
|
runtime.conmonPath = conmonPath
|
|
|
|
runtime.conmonEnv = conmonEnv
|
|
|
|
runtime.cgroupManager = cgroupManager
|
2017-10-23 15:36:10 +00:00
|
|
|
runtime.tmpDir = tmpDir
|
2017-10-10 20:33:20 +00:00
|
|
|
runtime.logSizeMax = logSizeMax
|
|
|
|
runtime.noPivot = noPivotRoot
|
|
|
|
|
2017-10-23 15:36:10 +00:00
|
|
|
runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
|
|
|
|
runtime.socketsDir = filepath.Join(runtime.tmpDir, "socket")
|
|
|
|
|
2017-10-10 20:33:20 +00:00
|
|
|
if cgroupManager != CgroupfsCgroupsManager && cgroupManager != SystemdCgroupsManager {
|
|
|
|
return nil, errors.Wrapf(ErrInvalidArg, "invalid cgroup manager specified: %s", cgroupManager)
|
|
|
|
}
|
|
|
|
|
2017-10-23 15:36:10 +00:00
|
|
|
// Create the exit files and attach sockets directories
|
|
|
|
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
|
|
|
|
// The directory is allowed to exist
|
|
|
|
if !os.IsExist(err) {
|
|
|
|
return nil, errors.Wrapf(err, "error creating OCI runtime exit files directory %s",
|
|
|
|
runtime.exitsDir)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := os.MkdirAll(runtime.socketsDir, 0750); err != nil {
|
|
|
|
// The directory is allowed to exist
|
|
|
|
if !os.IsExist(err) {
|
|
|
|
return nil, errors.Wrapf(err, "error creating OCI runtime attach sockets directory %s",
|
|
|
|
runtime.socketsDir)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-10 20:33:20 +00:00
|
|
|
return runtime, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// newPipe creates a unix socket pair for communication
|
|
|
|
func newPipe() (parent *os.File, child *os.File, err error) {
|
|
|
|
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create systemd unit name for cgroup scopes
|
|
|
|
func createUnitName(prefix string, name string) string {
|
|
|
|
return fmt.Sprintf("%s-%s.scope", prefix, name)
|
|
|
|
}
|
|
|
|
|
|
|
|
// CreateContainer creates a container in the OCI runtime
|
|
|
|
// TODO terminal support for container
|
|
|
|
// Presently just ignoring conmon opts related to it
|
|
|
|
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) error {
|
2017-10-20 19:26:08 +00:00
|
|
|
var stderrBuf bytes.Buffer
|
|
|
|
|
2017-10-10 20:33:20 +00:00
|
|
|
parentPipe, childPipe, err := newPipe()
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrapf(err, "error creating socket pair")
|
|
|
|
}
|
|
|
|
|
|
|
|
childStartPipe, parentStartPipe, err := newPipe()
|
|
|
|
if err != nil {
|
2017-10-25 16:04:52 +00:00
|
|
|
return errors.Wrapf(err, "error creating socket pair for start pipe")
|
2017-10-10 20:33:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
defer parentPipe.Close()
|
|
|
|
defer parentStartPipe.Close()
|
|
|
|
|
|
|
|
args := []string{}
|
|
|
|
if r.cgroupManager == SystemdCgroupsManager {
|
|
|
|
args = append(args, "-s")
|
|
|
|
}
|
|
|
|
args = append(args, "-c", ctr.ID())
|
|
|
|
args = append(args, "-u", ctr.ID())
|
|
|
|
args = append(args, "-r", r.path)
|
2017-10-23 18:21:15 +00:00
|
|
|
args = append(args, "-b", ctr.bundlePath())
|
2017-10-20 19:26:08 +00:00
|
|
|
args = append(args, "-p", filepath.Join(ctr.state.RunDir, "pidfile"))
|
2017-10-10 20:33:20 +00:00
|
|
|
// TODO container log location should be configurable
|
|
|
|
// The default also likely shouldn't be this
|
2017-10-20 19:26:08 +00:00
|
|
|
args = append(args, "-l", filepath.Join(ctr.config.StaticDir, "ctr.log"))
|
2017-10-10 20:33:20 +00:00
|
|
|
args = append(args, "--exit-dir", r.exitsDir)
|
2017-10-23 15:36:10 +00:00
|
|
|
args = append(args, "--socket-dir-path", r.socketsDir)
|
2017-10-20 19:26:08 +00:00
|
|
|
if ctr.config.Spec.Process.Terminal {
|
|
|
|
args = append(args, "-t")
|
|
|
|
} else if ctr.config.Stdin {
|
|
|
|
args = append(args, "-i")
|
|
|
|
}
|
2017-10-10 20:33:20 +00:00
|
|
|
if r.logSizeMax >= 0 {
|
|
|
|
args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax))
|
|
|
|
}
|
|
|
|
if r.noPivot {
|
|
|
|
args = append(args, "--no-pivot")
|
|
|
|
}
|
|
|
|
logrus.WithFields(logrus.Fields{
|
|
|
|
"args": args,
|
|
|
|
}).Debugf("running conmon: %s", r.conmonPath)
|
|
|
|
|
|
|
|
cmd := exec.Command(r.conmonPath, args...)
|
2017-10-20 19:26:08 +00:00
|
|
|
cmd.Dir = ctr.state.RunDir
|
2017-10-10 20:33:20 +00:00
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
|
|
Setpgid: true,
|
|
|
|
}
|
|
|
|
// TODO this is probably a really bad idea for some uses
|
|
|
|
// Make this configurable
|
|
|
|
cmd.Stdin = os.Stdin
|
|
|
|
cmd.Stdout = os.Stdout
|
|
|
|
cmd.Stderr = os.Stderr
|
2017-10-20 19:26:08 +00:00
|
|
|
if ctr.config.Spec.Process.Terminal {
|
|
|
|
cmd.Stderr = &stderrBuf
|
|
|
|
}
|
2017-10-10 20:33:20 +00:00
|
|
|
|
|
|
|
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
|
|
|
|
// 0, 1 and 2 are stdin, stdout and stderr
|
|
|
|
cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
|
|
|
|
cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
|
|
|
|
|
|
|
|
err = cmd.Start()
|
|
|
|
if err != nil {
|
|
|
|
childPipe.Close()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// We don't need childPipe on the parent side
|
|
|
|
childPipe.Close()
|
|
|
|
childStartPipe.Close()
|
|
|
|
|
|
|
|
// Move conmon to specified cgroup
|
|
|
|
if r.cgroupManager == SystemdCgroupsManager {
|
|
|
|
logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("libpod-conmon", ctr.ID()))
|
|
|
|
if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("libpod-conmon", ctr.ID())); err != nil {
|
|
|
|
logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/libpod-conmon-"+ctr.ID())), &spec.LinuxResources{})
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
|
|
|
|
} else {
|
|
|
|
// XXX: this defer does nothing as the cgroup can't be deleted cause
|
|
|
|
// it contains the conmon pid in tasks
|
|
|
|
// we need to remove this defer and delete the cgroup once conmon exits
|
|
|
|
// maybe need a conmon monitor?
|
|
|
|
defer control.Delete()
|
|
|
|
if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil {
|
|
|
|
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We set the cgroup, now the child can start creating children */
|
|
|
|
someData := []byte{0}
|
|
|
|
_, err = parentStartPipe.Write(someData)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Wait for initial setup and fork, and reap child */
|
|
|
|
err = cmd.Wait()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO should do a defer r.deleteContainer(ctr) here if err != nil
|
|
|
|
// Need deleteContainer to be working first, though...
|
|
|
|
|
|
|
|
// Wait to get container pid from conmon
|
|
|
|
type syncStruct struct {
|
|
|
|
si *syncInfo
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
ch := make(chan syncStruct)
|
|
|
|
go func() {
|
|
|
|
var si *syncInfo
|
|
|
|
if err = json.NewDecoder(parentPipe).Decode(&si); err != nil {
|
|
|
|
ch <- syncStruct{err: err}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
ch <- syncStruct{si: si}
|
|
|
|
}()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case ss := <-ch:
|
|
|
|
if ss.err != nil {
|
|
|
|
return errors.Wrapf(ss.err, "error reading container (probably exited) json message")
|
|
|
|
}
|
|
|
|
logrus.Debugf("Received container pid: %d", ss.si.Pid)
|
|
|
|
if ss.si.Pid == -1 {
|
|
|
|
if ss.si.Message != "" {
|
|
|
|
return errors.Wrapf(ErrInternal, "container create failed: %s", ss.si.Message)
|
|
|
|
}
|
|
|
|
return errors.Wrapf(ErrInternal, "container create failed")
|
|
|
|
}
|
|
|
|
case <-time.After(ContainerCreateTimeout):
|
|
|
|
return errors.Wrapf(ErrInternal, "container creation timeout")
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// updateContainerStatus retrieves the current status of the container from the
|
|
|
|
// runtime
|
|
|
|
func (r *OCIRuntime) updateContainerStatus(ctr *Container) error {
|
|
|
|
return ErrNotImplemented
|
|
|
|
}
|
|
|
|
|
|
|
|
// startContainer starts the given container
|
|
|
|
func (r *OCIRuntime) startContainer(ctr *Container) error {
|
|
|
|
// TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
|
|
|
|
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "start", ctr.ID()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO record start time in container struct
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|