From 241653e152c52e9ab3772fc5638b42a9a2c93ea7 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Tue, 10 Oct 2017 16:33:20 -0400 Subject: [PATCH] Add container creation logic to Libpod Signed-off-by: Matthew Heon --- libpod/container.go | 249 ++++++++++++++++++++++++++++++++++----- libpod/errors.go | 4 + libpod/oci.go | 240 ++++++++++++++++++++++++++++++++++++++ libpod/options.go | 82 ++++++++++++- libpod/pod.go | 12 +- libpod/runtime.go | 53 +++++++-- libpod/runtime_ctr.go | 21 +++- libpod/storage.go | 263 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 873 insertions(+), 51 deletions(-) create mode 100644 libpod/oci.go create mode 100644 libpod/storage.go diff --git a/libpod/container.go b/libpod/container.go index f4df9a0e..522e1f7a 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -1,52 +1,121 @@ package libpod import ( + "encoding/json" + "io/ioutil" + "path/filepath" "sync" "github.com/containers/storage" "github.com/docker/docker/pkg/stringid" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "github.com/ulule/deepcopier" ) +// ContainerState represents the current state of a container +type ContainerState int + +const ( + // ContainerStateUnknown indicates that the container is in an error + // state where information about it cannot be retrieved + ContainerStateUnknown ContainerState = iota + // ContainerStateConfigured indicates that the container has had its + // storage configured but it has not been created in the OCI runtime + ContainerStateConfigured ContainerState = iota + // ContainerStateCreated indicates the container has been created in + // the OCI runtime but not started + ContainerStateCreated ContainerState = iota + // ContainerStateRunning indicates the container is currently executing + ContainerStateRunning ContainerState = iota + // ContainerStateStopped indicates that the container was running but has + // exited + ContainerStateStopped ContainerState = iota + // ContainerStatePaused indicates that the container has been paused + ContainerStatePaused ContainerState = iota +) + // Container is a single OCI container type Container struct { - id string - name string + config *containerConfig - spec *spec.Spec - pod *Pod + pod *Pod + runningSpec *spec.Spec + state ContainerState - valid bool - lock sync.RWMutex + // The location of the on-disk OCI runtime spec + specfilePath string + // Path to the nonvolatile container configuration file + statefilePath string + // Path to the container's non-volatile directory + containerDir string + // Path to the container's volatile directory + containerRunDir string + // Path to the mountpoint of the container's root filesystem + containerMountPoint string + + // Whether this container was configured with containers/storage + useContainerStorage bool + // Containers/storage information on container + // Will be empty if container is configured using a directory + containerStorageInfo *ContainerInfo + + // TODO move to storage.Locker from sync.Mutex + valid bool + lock sync.Mutex + runtime *Runtime +} + +// containerConfig contains all information that was used to create the +// container. It may not be changed once created. +// It is stored as an unchanging part of on-disk state +type containerConfig struct { + Spec *spec.Spec `json:"spec"` + ID string `json:"id"` + Name string `json:"name"` + RootfsDir *string `json:"rootfsDir,omitempty"` + RootfsImageID *string `json:"rootfsImageID,omitempty"` + RootfsImageName *string `json:"rootfsImageName,omitempty"` + UseImageConfig bool `json:"useImageConfig"` + Pod *string `json:"pod,omitempty"` + SharedNamespaceCtr *string `json:"shareNamespacesWith,omitempty"` + SharedNamespaceMap map[string]string `json:"sharedNamespaces"` } // ID returns the container's ID func (c *Container) ID() string { - // No locking needed, ID will never mutate after a container is created - return c.id + return c.config.ID } // Name returns the container's name func (c *Container) Name() string { - return c.name + return c.config.Name } // Spec returns the container's OCI runtime spec +// The spec returned is the one used to create the container. The running +// spec may differ slightly as mounts are added based on the image func (c *Container) Spec() *spec.Spec { - // The spec can potentially be altered when storage is configured and to - // add annotations at container create time - // As such, access to it is locked - c.lock.RLock() - defer c.lock.RUnlock() - spec := new(spec.Spec) - deepcopier.Copy(c.spec).To(spec) + deepcopier.Copy(c.config.Spec).To(spec) return spec } +// State returns the current state of the container +func (c *Container) State() (ContainerState, error) { + c.lock.Lock() + defer c.lock.Unlock() + + // TODO uncomment when working + // if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { + // return ContainerStateUnknown, err + // } + + return c.state, nil +} + // Make a new container func newContainer(rspec *spec.Spec) (*Container, error) { if rspec == nil { @@ -54,23 +123,147 @@ func newContainer(rspec *spec.Spec) (*Container, error) { } ctr := new(Container) - ctr.id = stringid.GenerateNonCryptoID() - ctr.name = ctr.id // TODO generate unique human-readable names + ctr.config = new(containerConfig) - ctr.spec = new(spec.Spec) - deepcopier.Copy(rspec).To(ctr.spec) + ctr.config.ID = stringid.GenerateNonCryptoID() + ctr.config.Name = ctr.config.ID // TODO generate unique human-readable names + + ctr.config.Spec = new(spec.Spec) + deepcopier.Copy(rspec).To(ctr.config.Spec) return ctr, nil } +// Create container root filesystem for use +func (c *Container) setupStorage() error { + c.lock.Lock() + defer c.lock.Unlock() + + if !c.valid { + return errors.Wrapf(ErrCtrRemoved, "container %s is not valid", c.ID()) + } + + if c.state != ContainerStateConfigured { + return errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Configured state to have storage set up", c.ID()) + } + + // If we're configured to use a directory, perform that setup + if c.config.RootfsDir != nil { + // TODO implement directory-based root filesystems + return ErrNotImplemented + } + + // Not using a directory, so call into containers/storage + return c.setupImageRootfs() +} + +// Set up an image as root filesystem using containers/storage +func (c *Container) setupImageRootfs() error { + // Need both an image ID and image name, plus a bool telling us whether to use the image configuration + if c.config.RootfsImageID == nil || c.config.RootfsImageName == nil { + return errors.Wrapf(ErrInvalidArg, "must provide image ID and image name to use an image") + } + + // TODO SELinux mount label + containerInfo, err := c.runtime.storageService.CreateContainerStorage(c.runtime.imageContext, *c.config.RootfsImageName, *c.config.RootfsImageID, c.config.Name, c.config.ID, "") + if err != nil { + return errors.Wrapf(err, "error creating container storage") + } + + c.useContainerStorage = true + c.containerStorageInfo = &containerInfo + c.containerDir = containerInfo.Dir + c.containerRunDir = containerInfo.RunDir + + return nil +} + // Create creates a container in the OCI runtime -func (c *Container) Create() error { - return ErrNotImplemented +func (c *Container) Create() (err error) { + c.lock.Lock() + defer c.lock.Unlock() + + if !c.valid { + return errors.Wrapf(ErrCtrRemoved, "container %s is not valid", c.ID()) + } + + if c.state != ContainerStateConfigured { + return errors.Wrapf(ErrCtrExists, "container %s has already been created in runtime", c.ID()) + } + + // If using containers/storage, mount the container + if !c.useContainerStorage { + // TODO implemented directory-based root filesystems + return ErrNotImplemented + } else { + mountPoint, err := c.runtime.storageService.StartContainer(c.ID()) + if err != nil { + return errors.Wrapf(err, "error mounting storage for container %s", c.ID()) + } + c.containerMountPoint = mountPoint + + defer func() { + if err != nil { + if err2 := c.runtime.storageService.StopContainer(c.ID()); err2 != nil { + logrus.Errorf("Error unmounting storage for container %s: %v", c.ID(), err2) + } + + c.containerMountPoint = "" + } + }() + } + + // Make the OCI runtime spec we will use + c.runningSpec = new(spec.Spec) + deepcopier.Copy(c.config.Spec).To(c.runningSpec) + c.runningSpec.Root.Path = c.containerMountPoint + + // TODO Add annotation for start time to spec + + // Save the OCI spec to disk + jsonPath := filepath.Join(c.containerRunDir, "config.json") + fileJSON, err := json.Marshal(c.runningSpec) + if err != nil { + return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) + } + if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil { + return errors.Wrapf(err, "error writing runtime spec JSON to file for container %s", c.ID()) + } + + // With the spec complete, do an OCI create + // TODO set cgroup parent in a sane fashion + if err := c.runtime.ociRuntime.createContainer(c, "/libpod_parent"); err != nil { + return err + } + + // TODO should flush this state to disk here + c.state = ContainerStateCreated + + return nil } // Start starts a container func (c *Container) Start() error { - return ErrNotImplemented + c.lock.Lock() + defer c.lock.Unlock() + + if !c.valid { + return ErrCtrRemoved + } + + // Container must be created or stopped to be started + if !(c.state == ContainerStateCreated || c.state == ContainerStateStopped) { + return errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID()) + } + + if err := c.runtime.ociRuntime.startContainer(c); err != nil { + return err + } + + // TODO should flush state to disk here + c.state = ContainerStateRunning + + return nil } // Stop stops a container @@ -101,9 +294,13 @@ func (c *Container) Mount() (string, error) { return "", ErrNotImplemented } -// Status gets a container's status -// TODO this should return relevant information about container state -func (c *Container) Status() error { +// Pause pauses a container +func (c *Container) Pause() error { + return ErrNotImplemented +} + +// Unpause unpauses a container +func (c *Container) Unpause() error { return ErrNotImplemented } diff --git a/libpod/errors.go b/libpod/errors.go index 31eddc46..24544500 100644 --- a/libpod/errors.go +++ b/libpod/errors.go @@ -20,6 +20,10 @@ var ( // ErrImageExists indicated an image with the same ID already exists ErrImageExists = errors.New("image already exists") + // ErrCtrStateInvalid indicates a container is in an improper state for + // the requested operation + ErrCtrStateInvalid = errors.New("container state improper") + // ErrRuntimeFinalized indicates that the runtime has already been // created and cannot be modified ErrRuntimeFinalized = errors.New("runtime has been finalized") diff --git a/libpod/oci.go b/libpod/oci.go new file mode 100644 index 00000000..672f0582 --- /dev/null +++ b/libpod/oci.go @@ -0,0 +1,240 @@ +package libpod + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "syscall" + "time" + + "github.com/containerd/cgroups" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + // TODO import these functions into libpod and remove the import + // Trying to keep libpod from depending on CRI-O code + "github.com/kubernetes-incubator/cri-o/utils" +) + +// OCI code is undergoing heavy rewrite + +const ( + // CgroupfsCgroupsManager represents cgroupfs native cgroup manager + CgroupfsCgroupsManager = "cgroupfs" + // SystemdCgroupsManager represents systemd native cgroup manager + SystemdCgroupsManager = "systemd" + + // ContainerCreateTimeout represents the value of container creating timeout + ContainerCreateTimeout = 240 * time.Second +) + +// OCIRuntime represents an OCI-compatible runtime that libpod can call into +// to perform container operations +type OCIRuntime struct { + name string + path string + conmonPath string + conmonEnv []string + cgroupManager string + exitsDir string + logSizeMax int64 + noPivot bool +} + +// syncInfo is used to return data from monitor process to daemon +type syncInfo struct { + Pid int `json:"pid"` + Message string `json:"message,omitempty"` +} + +// Make a new OCI runtime with provided options +func newOCIRuntime(name string, path string, conmonPath string, conmonEnv []string, cgroupManager string, exitsDir string, logSizeMax int64, noPivotRoot bool) (*OCIRuntime, error) { + runtime := new(OCIRuntime) + runtime.name = name + runtime.path = path + runtime.conmonPath = conmonPath + runtime.conmonEnv = conmonEnv + runtime.cgroupManager = cgroupManager + runtime.exitsDir = exitsDir + runtime.logSizeMax = logSizeMax + runtime.noPivot = noPivotRoot + + if cgroupManager != CgroupfsCgroupsManager && cgroupManager != SystemdCgroupsManager { + return nil, errors.Wrapf(ErrInvalidArg, "invalid cgroup manager specified: %s", cgroupManager) + } + + return runtime, nil +} + +// newPipe creates a unix socket pair for communication +func newPipe() (parent *os.File, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} + +// Create systemd unit name for cgroup scopes +func createUnitName(prefix string, name string) string { + return fmt.Sprintf("%s-%s.scope", prefix, name) +} + +// CreateContainer creates a container in the OCI runtime +// TODO terminal support for container +// Presently just ignoring conmon opts related to it +func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) error { + parentPipe, childPipe, err := newPipe() + if err != nil { + return errors.Wrapf(err, "error creating socket pair") + } + + childStartPipe, parentStartPipe, err := newPipe() + if err != nil { + return errors.Wrapf(err, "error creating socket pair") + } + + defer parentPipe.Close() + defer parentStartPipe.Close() + + args := []string{} + if r.cgroupManager == SystemdCgroupsManager { + args = append(args, "-s") + } + args = append(args, "-c", ctr.ID()) + args = append(args, "-u", ctr.ID()) + args = append(args, "-r", r.path) + args = append(args, "-b", ctr.containerRunDir) + args = append(args, "-p", filepath.Join(ctr.containerRunDir, "pidfile")) + // TODO container log location should be configurable + // The default also likely shouldn't be this + args = append(args, "-l", filepath.Join(ctr.containerDir, "ctr.log")) + args = append(args, "--exit-dir", r.exitsDir) + if r.logSizeMax >= 0 { + args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax)) + } + if r.noPivot { + args = append(args, "--no-pivot") + } + logrus.WithFields(logrus.Fields{ + "args": args, + }).Debugf("running conmon: %s", r.conmonPath) + + cmd := exec.Command(r.conmonPath, args...) + cmd.Dir = ctr.containerRunDir + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + // TODO this is probably a really bad idea for some uses + // Make this configurable + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe) + // 0, 1 and 2 are stdin, stdout and stderr + cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3)) + cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4)) + + err = cmd.Start() + if err != nil { + childPipe.Close() + return err + } + + // We don't need childPipe on the parent side + childPipe.Close() + childStartPipe.Close() + + // Move conmon to specified cgroup + if r.cgroupManager == SystemdCgroupsManager { + logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("libpod-conmon", ctr.ID())) + if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("libpod-conmon", ctr.ID())); err != nil { + logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) + } + } else { + control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/libpod-conmon-"+ctr.ID())), &spec.LinuxResources{}) + if err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else { + // XXX: this defer does nothing as the cgroup can't be deleted cause + // it contains the conmon pid in tasks + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + defer control.Delete() + if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } + } + } + + /* We set the cgroup, now the child can start creating children */ + someData := []byte{0} + _, err = parentStartPipe.Write(someData) + if err != nil { + return err + } + + /* Wait for initial setup and fork, and reap child */ + err = cmd.Wait() + if err != nil { + return err + } + + // TODO should do a defer r.deleteContainer(ctr) here if err != nil + // Need deleteContainer to be working first, though... + + // Wait to get container pid from conmon + type syncStruct struct { + si *syncInfo + err error + } + ch := make(chan syncStruct) + go func() { + var si *syncInfo + if err = json.NewDecoder(parentPipe).Decode(&si); err != nil { + ch <- syncStruct{err: err} + return + } + ch <- syncStruct{si: si} + }() + + select { + case ss := <-ch: + if ss.err != nil { + return errors.Wrapf(ss.err, "error reading container (probably exited) json message") + } + logrus.Debugf("Received container pid: %d", ss.si.Pid) + if ss.si.Pid == -1 { + if ss.si.Message != "" { + return errors.Wrapf(ErrInternal, "container create failed: %s", ss.si.Message) + } + return errors.Wrapf(ErrInternal, "container create failed") + } + case <-time.After(ContainerCreateTimeout): + return errors.Wrapf(ErrInternal, "container creation timeout") + } + return nil +} + +// updateContainerStatus retrieves the current status of the container from the +// runtime +func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { + return ErrNotImplemented +} + +// startContainer starts the given container +func (r *OCIRuntime) startContainer(ctr *Container) error { + // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers? + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "start", ctr.ID()); err != nil { + return err + } + + // TODO record start time in container struct + + return nil +} diff --git a/libpod/options.go b/libpod/options.go index f2f51b5c..d8270480 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -137,6 +137,7 @@ func WithConmonEnv(environment []string) RuntimeOption { // WithCgroupManager specifies the manager implementation name which is used to // handle cgroups for containers +// Current valid values are "cgroupfs" and "systemd" func WithCgroupManager(manager string) RuntimeOption { return func(rt *Runtime) error { if rt.valid { @@ -149,6 +150,20 @@ func WithCgroupManager(manager string) RuntimeOption { } } +// WithExitsDir sets the directory that container exit files (containing exit +// codes) will be created by conmon +func WithExitsDir(dir string) RuntimeOption { + return func(rt *Runtime) error { + if rt.valid { + return ErrRuntimeFinalized + } + + rt.config.ExitsDir = dir + + return nil + } +} + // WithSELinux enables SELinux on the container server func WithSELinux() RuntimeOption { return func(rt *Runtime) error { @@ -176,19 +191,74 @@ func WithPidsLimit(limit int64) RuntimeOption { } } +// WithMaxLogSize sets the maximum size of container logs +// Positive sizes are limits in bytes, -1 is unlimited +func WithMaxLogSize(limit int64) RuntimeOption { + return func(rt *Runtime) error { + if rt.valid { + return ErrRuntimeFinalized + } + + rt.config.MaxLogSize = limit + + return nil + } +} + +// WithNoPivotRoot sets the runtime to use MS_MOVE instead of PIVOT_ROOT when +// starting containers +func WithNoPivotRoot(noPivot bool) RuntimeOption { + return func(rt *Runtime) error { + if rt.valid { + return ErrRuntimeFinalized + } + + rt.config.NoPivotRoot = true + + return nil + } +} + // Container Creation Options // WithRootFSFromPath uses the given path as a container's root filesystem // No further setup is performed on this path func WithRootFSFromPath(path string) CtrCreateOption { - return ctrNotImplemented + return func(ctr *Container) error { + if ctr.valid { + return ErrCtrFinalized + } + + if ctr.config.RootfsDir != nil || ctr.config.RootfsImageID != nil || ctr.config.RootfsImageName != nil { + return fmt.Errorf("container already configured to with rootfs") + } + + ctr.config.RootfsDir = &path + + return nil + } } // WithRootFSFromImage sets up a fresh root filesystem using the given image // If useImageConfig is specified, image volumes, environment variables, and // other configuration from the image will be added to the config -func WithRootFSFromImage(image string, useImageConfig bool) CtrCreateOption { - return ctrNotImplemented +// TODO: Replace image name and ID with a libpod.Image struct when that is finished +func WithRootFSFromImage(imageID string, imageName string, useImageConfig bool) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return ErrCtrFinalized + } + + if ctr.config.RootfsDir != nil || ctr.config.RootfsImageID != nil || ctr.config.RootfsImageName != nil { + return fmt.Errorf("container already configured to with rootfs") + } + + ctr.config.RootfsImageID = &imageID + ctr.config.RootfsImageName = &imageName + ctr.config.UseImageConfig = useImageConfig + + return nil + } } // WithSharedNamespaces sets a container to share namespaces with another @@ -203,7 +273,7 @@ func WithSharedNamespaces(from *Container, namespaces map[string]string) CtrCrea // WithPod adds the container to a pod func (r *Runtime) WithPod(pod *Pod) CtrCreateOption { return func(ctr *Container) error { - if !ctr.valid { + if ctr.valid { return ErrCtrFinalized } @@ -241,11 +311,11 @@ func WithAnnotations(annotations map[string]string) CtrCreateOption { // WithName sets the container's name func WithName(name string) CtrCreateOption { return func(ctr *Container) error { - if !ctr.valid { + if ctr.valid { return ErrCtrFinalized } - ctr.name = name + ctr.config.Name = name return nil } diff --git a/libpod/pod.go b/libpod/pod.go index b7fcab4b..46b78909 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -50,11 +50,11 @@ func (p *Pod) addContainer(ctr *Container) error { return ErrPodRemoved } - if _, ok := p.containers[ctr.id]; ok { - return errors.Wrapf(ErrCtrExists, "container with ID %s already exists in pod %s", ctr.id, p.id) + if _, ok := p.containers[ctr.ID()]; ok { + return errors.Wrapf(ErrCtrExists, "container with ID %s already exists in pod %s", ctr.ID(), p.id) } - p.containers[ctr.id] = ctr + p.containers[ctr.ID()] = ctr return nil } @@ -69,11 +69,11 @@ func (p *Pod) removeContainer(ctr *Container) error { return ErrPodRemoved } - if _, ok := p.containers[ctr.id]; !ok { - return errors.Wrapf(ErrNoSuchCtr, "no container with id %s in pod %s", ctr.id, p.id) + if _, ok := p.containers[ctr.ID()]; !ok { + return errors.Wrapf(ErrNoSuchCtr, "no container with id %s in pod %s", ctr.ID(), p.id) } - delete(p.containers, ctr.id) + delete(p.containers, ctr.ID()) return nil } diff --git a/libpod/runtime.go b/libpod/runtime.go index 47dd9d19..94266a8a 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -1,13 +1,12 @@ package libpod import ( + "os" "sync" is "github.com/containers/image/storage" "github.com/containers/image/types" "github.com/containers/storage" - "github.com/kubernetes-incubator/cri-o/server/apparmor" - "github.com/kubernetes-incubator/cri-o/server/seccomp" "github.com/pkg/errors" "github.com/ulule/deepcopier" ) @@ -18,14 +17,14 @@ type RuntimeOption func(*Runtime) error // Runtime is the core libpod runtime type Runtime struct { - config *RuntimeConfig - state State - store storage.Store - imageContext *types.SystemContext - apparmorEnabled bool - seccompEnabled bool - valid bool - lock sync.RWMutex + config *RuntimeConfig + state State + store storage.Store + storageService *storageService + imageContext *types.SystemContext + ociRuntime *OCIRuntime + valid bool + lock sync.RWMutex } // RuntimeConfig contains configuration options used to set up the runtime @@ -39,8 +38,11 @@ type RuntimeConfig struct { ConmonPath string ConmonEnvVars []string CgroupManager string + ExitsDir string SelinuxEnabled bool PidsLimit int64 + MaxLogSize int64 + NoPivotRoot bool } var ( @@ -54,8 +56,11 @@ var ( "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", }, CgroupManager: "cgroupfs", + ExitsDir: "/var/run/libpod/exits", SelinuxEnabled: false, PidsLimit: 1024, + MaxLogSize: -1, + NoPivotRoot: false, } ) @@ -83,6 +88,14 @@ func NewRuntime(options ...RuntimeOption) (*Runtime, error) { runtime.store = store is.Transport.SetStore(store) + // TODO remove StorageImageServer and make its functions work directly + // on Runtime (or convert to something that satisfies an image) + storageService, err := getStorageService(runtime.store) + if err != nil { + return nil, err + } + runtime.storageService = storageService + // Set up containers/image runtime.imageContext = &types.SystemContext{ SignaturePolicyPath: runtime.config.SignaturePolicyPath, @@ -95,8 +108,24 @@ func NewRuntime(options ...RuntimeOption) (*Runtime, error) { } runtime.state = state - runtime.seccompEnabled = seccomp.IsEnabled() - runtime.apparmorEnabled = apparmor.IsEnabled() + // Make an OCI runtime to perform container operations + ociRuntime, err := newOCIRuntime("runc", runtime.config.RuntimePath, + runtime.config.ConmonPath, runtime.config.ConmonEnvVars, + runtime.config.CgroupManager, runtime.config.ExitsDir, + runtime.config.MaxLogSize, runtime.config.NoPivotRoot) + if err != nil { + return nil, err + } + runtime.ociRuntime = ociRuntime + + // Make the directory that will hold container exit files + if err := os.MkdirAll(runtime.config.ExitsDir, 0755); err != nil { + // The directory is allowed to exist + if !os.IsExist(err) { + return nil, errors.Wrapf(err, "error creating container exit files directory %s", + runtime.config.ExitsDir) + } + } // Mark the runtime as valid - ready to be used, cannot be modified // further diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index fc788227..68b7880d 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -38,6 +38,13 @@ func (r *Runtime) NewContainer(spec *spec.Spec, options ...CtrCreateOption) (*Co } ctr.valid = true + ctr.state = ContainerStateConfigured + ctr.runtime = r + + if err := ctr.setupStorage(); err != nil { + return nil, errors.Wrapf(err, "error configuring storage for container") + } + // TODO: once teardownStorage is implemented, do a defer here that tears down storage is AddContainer fails if err := r.state.AddContainer(ctr); err != nil { // If we joined a pod, remove ourself from it @@ -77,10 +84,16 @@ func (r *Runtime) RemoveContainer(c *Container, force bool) error { // TODO check container status and unmount storage // TODO check that no other containers depend on this container's // namespaces - if err := c.Status(); err != nil { + status, err := c.State() + if err != nil { return err } + // A container cannot be removed if it is running + if status == ContainerStateRunning { + return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is running", c.ID()) + } + if err := r.state.RemoveContainer(c); err != nil { return errors.Wrapf(err, "error removing container from state") } @@ -185,6 +198,7 @@ func (r *Runtime) getContainersWithImage(imageID string) ([]storage.Container, e } // removeMultipleContainers deletes a list of containers from the store +// TODO refactor this to remove libpod Containers func (r *Runtime) removeMultipleContainers(containers []storage.Container) error { for _, ctr := range containers { if err := r.store.DeleteContainer(ctr.ID); err != nil { @@ -193,3 +207,8 @@ func (r *Runtime) removeMultipleContainers(containers []storage.Container) error } return nil } + +// ContainerConfigToDisk saves a container's nonvolatile configuration to disk +func (r *Runtime) containerConfigToDisk(ctr *Container) error { + return ErrNotImplemented +} diff --git a/libpod/storage.go b/libpod/storage.go new file mode 100644 index 00000000..678b73b3 --- /dev/null +++ b/libpod/storage.go @@ -0,0 +1,263 @@ +package libpod + +import ( + "encoding/json" + "time" + + istorage "github.com/containers/image/storage" + "github.com/containers/image/types" + "github.com/containers/storage" + "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +type storageService struct { + store storage.Store +} + +// getStorageService returns a storageService which can create container root +// filesystems from images +func getStorageService(store storage.Store) (*storageService, error) { + return &storageService{store: store}, nil +} + +// ContainerInfo wraps a subset of information about a container: its ID and +// the locations of its nonvolatile and volatile per-container directories, +// along with a copy of the configuration blob from the image that was used to +// create the container, if the image had a configuration. +type ContainerInfo struct { + ID string + Dir string + RunDir string + Config *v1.Image +} + +// RuntimeContainerMetadata is the structure that we encode as JSON and store +// in the metadata field of storage.Container objects. It is used for +// specifying attributes of pod sandboxes and containers when they are being +// created, and allows a container's MountLabel, and possibly other values, to +// be modified in one read/write cycle via calls to +// RuntimeServer.ContainerMetadata, RuntimeContainerMetadata.SetMountLabel, +// and RuntimeServer.SetContainerMetadata. +type RuntimeContainerMetadata struct { + // The provided name and the ID of the image that was used to + // instantiate the container. + ImageName string `json:"image-name"` // Applicable to both PodSandboxes and Containers + ImageID string `json:"image-id"` // Applicable to both PodSandboxes and Containers + // The container's name, which for an infrastructure container is usually PodName + "-infra". + ContainerName string `json:"name"` // Applicable to both PodSandboxes and Containers, mandatory + CreatedAt int64 `json:"created-at"` // Applicable to both PodSandboxes and Containers + MountLabel string `json:"mountlabel,omitempty"` // Applicable to both PodSandboxes and Containers +} + +// SetMountLabel updates the mount label held by a RuntimeContainerMetadata +// object. +func (metadata *RuntimeContainerMetadata) SetMountLabel(mountLabel string) { + metadata.MountLabel = mountLabel +} + +// CreateContainerStorage creates the storage end of things. We already have the container spec created +// TO-DO We should be passing in an KpodImage object in the future. +func (r *storageService) CreateContainerStorage(systemContext *types.SystemContext, imageName, imageID, containerName, containerID, mountLabel string) (ContainerInfo, error) { + var ref types.ImageReference + if imageName == "" && imageID == "" { + return ContainerInfo{}, ErrEmptyID + } + if containerName == "" { + return ContainerInfo{}, ErrEmptyID + } + //// Check if we have the specified image. + ref, err := istorage.Transport.ParseStoreReference(r.store, imageName) + if err != nil { + return ContainerInfo{}, err + } + img, err := istorage.Transport.GetStoreImage(r.store, ref) + if err != nil { + return ContainerInfo{}, err + } + // Pull out a copy of the image's configuration. + image, err := ref.NewImage(systemContext) + if err != nil { + return ContainerInfo{}, err + } + defer image.Close() + + imageConfig, err := image.OCIConfig() + if err != nil { + return ContainerInfo{}, err + } + + // Update the image name and ID. + if imageName == "" && len(img.Names) > 0 { + imageName = img.Names[0] + } + imageID = img.ID + + // Build metadata to store with the container. + metadata := RuntimeContainerMetadata{ + ImageName: imageName, + ImageID: imageID, + ContainerName: containerName, + CreatedAt: time.Now().Unix(), + MountLabel: mountLabel, + } + mdata, err := json.Marshal(&metadata) + if err != nil { + return ContainerInfo{}, err + } + + // Build the container. + names := []string{containerName} + + container, err := r.store.CreateContainer(containerID, names, img.ID, "", string(mdata), nil) + if err != nil { + logrus.Debugf("failed to create container %s(%s): %v", metadata.ContainerName, containerID, err) + + return ContainerInfo{}, err + } + logrus.Debugf("created container %q", container.ID) + + // If anything fails after this point, we need to delete the incomplete + // container before returning. + defer func() { + if err != nil { + if err2 := r.store.DeleteContainer(container.ID); err2 != nil { + logrus.Infof("%v deleting partially-created container %q", err2, container.ID) + + return + } + logrus.Infof("deleted partially-created container %q", container.ID) + } + }() + + // Add a name to the container's layer so that it's easier to follow + // what's going on if we're just looking at the storage-eye view of things. + layerName := metadata.ContainerName + "-layer" + names, err = r.store.Names(container.LayerID) + if err != nil { + return ContainerInfo{}, err + } + names = append(names, layerName) + err = r.store.SetNames(container.LayerID, names) + if err != nil { + return ContainerInfo{}, err + } + + // Find out where the container work directories are, so that we can return them. + containerDir, err := r.store.ContainerDirectory(container.ID) + if err != nil { + return ContainerInfo{}, err + } + logrus.Debugf("container %q has work directory %q", container.ID, containerDir) + + containerRunDir, err := r.store.ContainerRunDirectory(container.ID) + if err != nil { + return ContainerInfo{}, err + } + logrus.Debugf("container %q has run directory %q", container.ID, containerRunDir) + + return ContainerInfo{ + ID: container.ID, // not needed + Dir: containerDir, + RunDir: containerRunDir, + Config: imageConfig, + }, nil +} + +func (r *storageService) DeleteContainer(idOrName string) error { + if idOrName == "" { + return ErrEmptyID + } + container, err := r.store.Container(idOrName) + if err != nil { + return err + } + err = r.store.DeleteContainer(container.ID) + if err != nil { + logrus.Debugf("failed to delete container %q: %v", container.ID, err) + return err + } + return nil +} + +func (r *storageService) SetContainerMetadata(idOrName string, metadata RuntimeContainerMetadata) error { + mdata, err := json.Marshal(&metadata) + if err != nil { + logrus.Debugf("failed to encode metadata for %q: %v", idOrName, err) + return err + } + return r.store.SetMetadata(idOrName, string(mdata)) +} + +func (r *storageService) GetContainerMetadata(idOrName string) (RuntimeContainerMetadata, error) { + metadata := RuntimeContainerMetadata{} + mdata, err := r.store.Metadata(idOrName) + if err != nil { + return metadata, err + } + if err = json.Unmarshal([]byte(mdata), &metadata); err != nil { + return metadata, err + } + return metadata, nil +} + +func (r *storageService) StartContainer(idOrName string) (string, error) { + container, err := r.store.Container(idOrName) + if err != nil { + if errors.Cause(err) == storage.ErrContainerUnknown { + return "", ErrNoSuchCtr + } + return "", err + } + metadata := RuntimeContainerMetadata{} + if err = json.Unmarshal([]byte(container.Metadata), &metadata); err != nil { + return "", err + } + mountPoint, err := r.store.Mount(container.ID, metadata.MountLabel) + if err != nil { + logrus.Debugf("failed to mount container %q: %v", container.ID, err) + return "", err + } + logrus.Debugf("mounted container %q at %q", container.ID, mountPoint) + return mountPoint, nil +} + +func (r *storageService) StopContainer(idOrName string) error { + if idOrName == "" { + return ErrEmptyID + } + container, err := r.store.Container(idOrName) + if err != nil { + return err + } + err = r.store.Unmount(container.ID) + if err != nil { + logrus.Debugf("failed to unmount container %q: %v", container.ID, err) + return err + } + logrus.Debugf("unmounted container %q", container.ID) + return nil +} + +func (r *storageService) GetWorkDir(id string) (string, error) { + container, err := r.store.Container(id) + if err != nil { + if errors.Cause(err) == storage.ErrContainerUnknown { + return "", ErrNoSuchCtr + } + return "", err + } + return r.store.ContainerDirectory(container.ID) +} + +func (r *storageService) GetRunDir(id string) (string, error) { + container, err := r.store.Container(id) + if err != nil { + if errors.Cause(err) == storage.ErrContainerUnknown { + return "", ErrNoSuchCtr + } + return "", err + } + return r.store.ContainerRunDirectory(container.ID) +}