diff --git a/oci/oci.go b/oci/oci.go index df5af077..d4f585c5 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -606,6 +606,16 @@ func (c *Container) Sandbox() string { return c.sandbox } +// Terminal returns whether the container has a TTY attached +func (c *Container) Terminal() bool { + return c.terminal +} + +// Privileged returns whether the container is privileged +func (c *Container) Privileged() bool { + return c.privileged +} + // NetNsPath returns the path to the network namespace of the container. func (c *Container) NetNsPath() (string, error) { if c.state == nil { diff --git a/server/container.go b/server/container.go index 56fb7e73..304e7169 100644 --- a/server/container.go +++ b/server/container.go @@ -18,14 +18,10 @@ func (s *Server) getContainerFromRequest(containerID string) (*oci.Container, er return nil, fmt.Errorf("container ID should not be empty") } - containerID, err := s.ctrIDIndex.Get(containerID) + c, err := s.state.LookupContainerByID(containerID) if err != nil { - return nil, fmt.Errorf("container with ID starting with %s not found: %v", containerID, err) + return nil, fmt.Errorf("container with ID starting with %v could not be retrieved: %v", containerID, err) } - c := s.state.containers.Get(containerID) - if c == nil { - return nil, fmt.Errorf("specified container not found: %s", containerID) - } return c, nil } diff --git a/server/container_create.go b/server/container_create.go index fdfe1ba2..46843033 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -16,6 +16,7 @@ import ( "github.com/docker/docker/pkg/symlink" "github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/server/apparmor" + "github.com/kubernetes-incubator/cri-o/server/sandbox" "github.com/kubernetes-incubator/cri-o/server/seccomp" "github.com/opencontainers/image-spec/specs-go/v1" "github.com/opencontainers/runc/libcontainer/user" @@ -31,7 +32,7 @@ const ( seccompLocalhostPrefix = "localhost/" ) -func addOciBindMounts(sb *sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { +func addOciBindMounts(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { mounts := containerConfig.GetMounts() for _, mount := range mounts { dest := mount.ContainerPath @@ -51,7 +52,7 @@ func addOciBindMounts(sb *sandbox, containerConfig *pb.ContainerConfig, specgen if mount.SelinuxRelabel { // Need a way in kubernetes to determine if the volume is shared or private - if err := label.Relabel(src, sb.mountLabel, true); err != nil && err != syscall.ENOTSUP { + if err := label.Relabel(src, sb.MountLabel(), true); err != nil && err != syscall.ENOTSUP { return fmt.Errorf("relabel failed %s: %v", src, err) } } @@ -201,24 +202,15 @@ func ensureSaneLogPath(logPath string) error { // CreateContainer creates a new container in specified PodSandbox func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) { logrus.Debugf("CreateContainerRequest %+v", req) - s.Update() - - s.updateLock.RLock() - defer s.updateLock.RUnlock() sbID := req.PodSandboxId if sbID == "" { return nil, fmt.Errorf("PodSandboxId should not be empty") } - sandboxID, err := s.podIDIndex.Get(sbID) + sb, err := s.state.LookupSandboxByID(sbID) if err != nil { - return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", sbID, err) - } - - sb := s.getSandbox(sandboxID) - if sb == nil { - return nil, fmt.Errorf("specified sandbox not found: %s", sandboxID) + return nil, fmt.Errorf("error retrieving PodSandbox with ID starting with %s: %v", sbID, err) } // The config of the container @@ -233,17 +225,11 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq } attempt := containerConfig.GetMetadata().Attempt - containerID, containerName, err := s.generateContainerIDandName(sb.name, name, attempt) + containerID, containerName, err := s.generateContainerIDandName(sb.Name(), name, attempt) if err != nil { return nil, err } - defer func() { - if err != nil { - s.releaseContainerName(containerName) - } - }() - container, err := s.createSandboxContainer(ctx, containerID, containerName, sb, req.GetSandboxConfig(), containerConfig) if err != nil { return nil, err @@ -257,7 +243,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq } }() - if err = s.runtime.CreateContainer(container, sb.cgroupParent); err != nil { + if err = s.runtime.CreateContainer(container, sb.CgroupParent()); err != nil { return nil, err } @@ -265,10 +251,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq return nil, err } - s.addContainer(container) - - if err = s.ctrIDIndex.Add(containerID); err != nil { - s.removeContainer(container) + if err := s.addContainer(container); err != nil { return nil, err } @@ -280,7 +263,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq return resp, nil } -func (s *Server) createSandboxContainer(ctx context.Context, containerID string, containerName string, sb *sandbox, SandboxConfig *pb.PodSandboxConfig, containerConfig *pb.ContainerConfig) (*oci.Container, error) { +func (s *Server) createSandboxContainer(ctx context.Context, containerID string, containerName string, sb *sandbox.Sandbox, SandboxConfig *pb.PodSandboxConfig, containerConfig *pb.ContainerConfig) (*oci.Container, error) { if sb == nil { return nil, errors.New("createSandboxContainer needs a sandbox") } @@ -308,7 +291,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, // set this container's apparmor profile if it is set by sandbox if s.appArmorEnabled { - appArmorProfileName := s.getAppArmorProfileName(sb.annotations, metadata.Name) + appArmorProfileName := s.getAppArmorProfileName(sb.Annotations(), metadata.Name) if appArmorProfileName != "" { // reload default apparmor profile if it is unloaded. if s.appArmorProfile == apparmor.DefaultApparmorProfile { @@ -333,12 +316,12 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, logPath := containerConfig.LogPath if logPath == "" { // TODO: Should we use sandboxConfig.GetLogDirectory() here? - logPath = filepath.Join(sb.logDir, containerID+".log") + logPath = filepath.Join(sb.LogDir(), containerID+".log") } if !filepath.IsAbs(logPath) { // XXX: It's not really clear what this should be versus the sbox logDirectory. logrus.Warnf("requested logPath for ctr id %s is a relative path: %s", containerID, logPath) - logPath = filepath.Join(sb.logDir, logPath) + logPath = filepath.Join(sb.LogDir(), logPath) } // Handle https://issues.k8s.io/44043 @@ -347,7 +330,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } logrus.WithFields(logrus.Fields{ - "sbox.logdir": sb.logDir, + "sbox.logdir": sb.LogDir(), "ctr.logfile": containerConfig.LogPath, "log_path": logPath, }).Debugf("setting container's log_path") @@ -382,12 +365,12 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, specgen.SetLinuxResourcesOOMScoreAdj(int(oomScoreAdj)) } - if sb.cgroupParent != "" { + if sb.CgroupParent() != "" { if s.config.CgroupManager == "systemd" { - cgPath := sb.cgroupParent + ":" + "ocid" + ":" + containerID + cgPath := sb.CgroupParent() + ":" + "ocid" + ":" + containerID specgen.SetLinuxCgroupsPath(cgPath) } else { - specgen.SetLinuxCgroupsPath(sb.cgroupParent + "/" + containerID) + specgen.SetLinuxCgroupsPath(sb.CgroupParent() + "/" + containerID) } } @@ -412,12 +395,12 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } } - specgen.SetProcessSelinuxLabel(sb.processLabel) - specgen.SetLinuxMountLabel(sb.mountLabel) + specgen.SetProcessSelinuxLabel(sb.ProcessLabel()) + specgen.SetLinuxMountLabel(sb.MountLabel()) } // Join the namespace paths for the pod sandbox container. - podInfraState := s.runtime.ContainerStatus(sb.infraContainer) + podInfraState := s.runtime.ContainerStatus(sb.InfraContainer()) logrus.Debugf("pod container state %+v", podInfraState) @@ -426,7 +409,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - netNsPath := sb.netNsPath() + netNsPath := sb.NetNsPath() if netNsPath == "" { // The sandbox does not have a permanent namespace, // it's on the host one. @@ -448,20 +431,20 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } // bind mount the pod shm - specgen.AddBindMount(sb.shmPath, "/dev/shm", []string{"rw"}) + specgen.AddBindMount(sb.ShmPath(), "/dev/shm", []string{"rw"}) - if sb.resolvPath != "" { + if sb.ResolvPath() != "" { // bind mount the pod resolver file - specgen.AddBindMount(sb.resolvPath, "/etc/resolv.conf", []string{"ro"}) + specgen.AddBindMount(sb.ResolvPath(), "/etc/resolv.conf", []string{"ro"}) } - if sb.hostname != "" { - specgen.SetHostname(sb.hostname) + if sb.Hostname() != "" { + specgen.SetHostname(sb.Hostname()) } specgen.AddAnnotation("ocid/name", containerName) - specgen.AddAnnotation("ocid/sandbox_id", sb.id) - specgen.AddAnnotation("ocid/sandbox_name", sb.infraContainer.Name()) + specgen.AddAnnotation("ocid/sandbox_id", sb.ID()) + specgen.AddAnnotation("ocid/sandbox_name", sb.InfraContainer().Name()) specgen.AddAnnotation("ocid/container_type", containerTypeContainer) specgen.AddAnnotation("ocid/log_path", logPath) specgen.AddAnnotation("ocid/tty", fmt.Sprintf("%v", containerConfig.Tty)) @@ -485,19 +468,19 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } specgen.AddAnnotation("ocid/annotations", string(annotationsJSON)) - if err = s.setupSeccomp(&specgen, containerName, sb.annotations); err != nil { + if err = s.setupSeccomp(&specgen, containerName, sb.Annotations()); err != nil { return nil, err } metaname := metadata.Name attempt := metadata.Attempt containerInfo, err := s.storage.CreateContainer(s.imageContext, - sb.name, sb.id, + sb.Name(), sb.ID(), image, image, containerName, containerID, metaname, attempt, - sb.mountLabel, + sb.MountLabel(), nil) if err != nil { return nil, err @@ -575,7 +558,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty, sb.privileged) + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), labels, annotations, imageSpec, metadata, sb.ID(), containerConfig.Tty, sb.Privileged()) if err != nil { return nil, err } @@ -627,10 +610,7 @@ func (s *Server) generateContainerIDandName(podName string, name string, attempt if name == "infra" { nameStr = fmt.Sprintf("%s-%s", podName, name) } - if name, err = s.reserveContainerName(id, nameStr); err != nil { - return "", "", err - } - return id, name, err + return id, nameStr, err } // getAppArmorProfileName gets the profile name for the given container. diff --git a/server/container_list.go b/server/container_list.go index ce6171db..2d1577ea 100644 --- a/server/container_list.go +++ b/server/container_list.go @@ -29,38 +29,35 @@ func filterContainer(c *pb.Container, filter *pb.ContainerFilter) bool { // ListContainers lists all containers by filters. func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersRequest) (*pb.ListContainersResponse, error) { logrus.Debugf("ListContainersRequest %+v", req) - s.Update() var ctrs []*pb.Container filter := req.Filter - ctrList := s.state.containers.List() + ctrList, _ := s.state.GetAllContainers() // Filter using container id and pod id first. if filter != nil { if filter.Id != "" { - id, err := s.ctrIDIndex.Get(filter.Id) + c, err := s.state.LookupContainerByID(filter.Id) if err != nil { return nil, err } - c := s.state.containers.Get(id) - if c != nil { - if filter.PodSandboxId != "" { - if c.Sandbox() == filter.PodSandboxId { - ctrList = []*oci.Container{c} - } else { - ctrList = []*oci.Container{} - } - - } else { + if filter.PodSandboxId != "" { + if c.Sandbox() == filter.PodSandboxId { ctrList = []*oci.Container{c} + } else { + ctrList = []*oci.Container{} } + + } else { + ctrList = []*oci.Container{c} } } else { if filter.PodSandboxId != "" { - pod := s.state.sandboxes[filter.PodSandboxId] - if pod == nil { + pod, err := s.state.GetSandbox(filter.PodSandboxId) + // TODO check if this is a pod not found error, if not we should error out here + if err != nil { ctrList = []*oci.Container{} } else { - ctrList = pod.containers.List() + ctrList = pod.Containers() } } } diff --git a/server/container_remove.go b/server/container_remove.go index d8f21ca8..0eeff343 100644 --- a/server/container_remove.go +++ b/server/container_remove.go @@ -13,7 +13,6 @@ import ( // should be force removed. func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) { logrus.Debugf("RemoveContainerRequest %+v", req) - s.Update() c, err := s.getContainerFromRequest(req.ContainerId) if err != nil { return nil, err @@ -34,7 +33,9 @@ func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerReq return nil, fmt.Errorf("failed to delete container %s: %v", c.ID(), err) } - s.removeContainer(c) + if err := s.removeContainer(c); err != nil { + return nil, fmt.Errorf("failed to remove container %s: %v", c.ID(), err) + } if err := s.storage.StopContainer(c.ID()); err != nil { return nil, fmt.Errorf("failed to unmount container %s: %v", c.ID(), err) @@ -44,12 +45,6 @@ func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerReq return nil, fmt.Errorf("failed to delete storage for container %s: %v", c.ID(), err) } - s.releaseContainerName(c.Name()) - - if err := s.ctrIDIndex.Delete(c.ID()); err != nil { - return nil, err - } - resp := &pb.RemoveContainerResponse{} logrus.Debugf("RemoveContainerResponse: %+v", resp) return resp, nil diff --git a/server/container_start.go b/server/container_start.go index 13b6bfe2..128cc5fc 100644 --- a/server/container_start.go +++ b/server/container_start.go @@ -11,7 +11,6 @@ import ( // StartContainer starts the container. func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerRequest) (*pb.StartContainerResponse, error) { logrus.Debugf("StartContainerRequest %+v", req) - s.Update() c, err := s.getContainerFromRequest(req.ContainerId) if err != nil { return nil, err diff --git a/server/container_status.go b/server/container_status.go index 138a78e5..47a13a3d 100644 --- a/server/container_status.go +++ b/server/container_status.go @@ -13,7 +13,6 @@ import ( // ContainerStatus returns status of the container. func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusRequest) (*pb.ContainerStatusResponse, error) { logrus.Debugf("ContainerStatusRequest %+v", req) - s.Update() c, err := s.getContainerFromRequest(req.ContainerId) if err != nil { return nil, err diff --git a/server/container_stop.go b/server/container_stop.go index aed5a56c..58865edf 100644 --- a/server/container_stop.go +++ b/server/container_stop.go @@ -12,7 +12,6 @@ import ( // StopContainer stops a running container with a grace period (i.e., timeout). func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) { logrus.Debugf("StopContainerRequest %+v", req) - s.Update() c, err := s.getContainerFromRequest(req.ContainerId) if err != nil { return nil, err diff --git a/server/sandbox.go b/server/sandbox.go deleted file mode 100644 index 1832265c..00000000 --- a/server/sandbox.go +++ /dev/null @@ -1,281 +0,0 @@ -package server - -import ( - "crypto/rand" - "errors" - "fmt" - "os" - "path/filepath" - "sync" - - "github.com/Sirupsen/logrus" - "github.com/containernetworking/cni/pkg/ns" - "github.com/docker/docker/pkg/stringid" - "github.com/kubernetes-incubator/cri-o/oci" - "golang.org/x/sys/unix" - "k8s.io/apimachinery/pkg/fields" - pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" -) - -type sandboxNetNs struct { - sync.Mutex - ns ns.NetNS - symlink *os.File - closed bool - restored bool -} - -func (ns *sandboxNetNs) symlinkCreate(name string) error { - b := make([]byte, 4) - _, randErr := rand.Reader.Read(b) - if randErr != nil { - return randErr - } - - nsName := fmt.Sprintf("%s-%x", name, b) - symlinkPath := filepath.Join(nsRunDir, nsName) - - if err := os.Symlink(ns.ns.Path(), symlinkPath); err != nil { - return err - } - - fd, err := os.Open(symlinkPath) - if err != nil { - if removeErr := os.RemoveAll(symlinkPath); removeErr != nil { - return removeErr - } - - return err - } - - ns.symlink = fd - - return nil -} - -func (ns *sandboxNetNs) symlinkRemove() error { - if err := ns.symlink.Close(); err != nil { - return err - } - - return os.RemoveAll(ns.symlink.Name()) -} - -func isSymbolicLink(path string) (bool, error) { - fi, err := os.Lstat(path) - if err != nil { - return false, err - } - - return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil -} - -func netNsGet(nspath, name string) (*sandboxNetNs, error) { - if err := ns.IsNSorErr(nspath); err != nil { - return nil, errSandboxClosedNetNS - } - - symlink, symlinkErr := isSymbolicLink(nspath) - if symlinkErr != nil { - return nil, symlinkErr - } - - var resolvedNsPath string - if symlink { - path, err := os.Readlink(nspath) - if err != nil { - return nil, err - } - resolvedNsPath = path - } else { - resolvedNsPath = nspath - } - - netNS, err := ns.GetNS(resolvedNsPath) - if err != nil { - return nil, err - } - - netNs := &sandboxNetNs{ns: netNS, closed: false, restored: true} - - if symlink { - fd, err := os.Open(nspath) - if err != nil { - return nil, err - } - - netNs.symlink = fd - } else { - if err := netNs.symlinkCreate(name); err != nil { - return nil, err - } - } - - return netNs, nil -} - -func hostNetNsPath() (string, error) { - netNS, err := ns.GetCurrentNS() - if err != nil { - return "", err - } - - defer netNS.Close() - return netNS.Path(), nil -} - -type sandbox struct { - id string - name string - logDir string - labels fields.Set - annotations map[string]string - infraContainer *oci.Container - containers oci.Store - processLabel string - mountLabel string - netns *sandboxNetNs - metadata *pb.PodSandboxMetadata - shmPath string - cgroupParent string - privileged bool - resolvPath string - hostname string -} - -const ( - podDefaultNamespace = "default" - defaultShmSize = 64 * 1024 * 1024 - nsRunDir = "/var/run/netns" - podInfraCommand = "/pause" -) - -var ( - errSandboxIDEmpty = errors.New("PodSandboxId should not be empty") - errSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed") -) - -func (s *sandbox) addContainer(c *oci.Container) { - s.containers.Add(c.Name(), c) -} - -func (s *sandbox) getContainer(name string) *oci.Container { - return s.containers.Get(name) -} - -func (s *sandbox) removeContainer(c *oci.Container) { - s.containers.Delete(c.Name()) -} - -func (s *sandbox) netNs() ns.NetNS { - if s.netns == nil { - return nil - } - - return s.netns.ns -} - -func (s *sandbox) netNsPath() string { - if s.netns == nil { - return "" - } - - return s.netns.symlink.Name() -} - -func (s *sandbox) netNsCreate() error { - if s.netns != nil { - return fmt.Errorf("net NS already created") - } - - netNS, err := ns.NewNS() - if err != nil { - return err - } - - s.netns = &sandboxNetNs{ - ns: netNS, - closed: false, - } - - if err := s.netns.symlinkCreate(s.name); err != nil { - logrus.Warnf("Could not create nentns symlink %v", err) - - if err1 := s.netns.ns.Close(); err1 != nil { - return err1 - } - - return err - } - - return nil -} - -func (s *sandbox) netNsRemove() error { - if s.netns == nil { - logrus.Warn("no networking namespace") - return nil - } - - s.netns.Lock() - defer s.netns.Unlock() - - if s.netns.closed { - // netNsRemove() can be called multiple - // times without returning an error. - return nil - } - - if err := s.netns.symlinkRemove(); err != nil { - return err - } - - if err := s.netns.ns.Close(); err != nil { - return err - } - - if s.netns.restored { - if err := unix.Unmount(s.netns.ns.Path(), unix.MNT_DETACH); err != nil { - return err - } - - if err := os.RemoveAll(s.netns.ns.Path()); err != nil { - return err - } - } - - s.netns.closed = true - return nil -} - -func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) { - var ( - err error - id = stringid.GenerateNonCryptoID() - ) - if namespace == "" { - namespace = podDefaultNamespace - } - - if name, err = s.reservePodName(id, fmt.Sprintf("%s-%s-%v", namespace, name, attempt)); err != nil { - return "", "", err - } - return id, name, err -} - -func (s *Server) getPodSandboxFromRequest(podSandboxID string) (*sandbox, error) { - if podSandboxID == "" { - return nil, errSandboxIDEmpty - } - - sandboxID, err := s.podIDIndex.Get(podSandboxID) - if err != nil { - return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", podSandboxID, err) - } - - sb := s.getSandbox(sandboxID) - if sb == nil { - return nil, fmt.Errorf("specified pod sandbox not found: %s", sandboxID) - } - return sb, nil -} diff --git a/server/sandbox/sandbox.go b/server/sandbox/sandbox.go new file mode 100644 index 00000000..ad934d86 --- /dev/null +++ b/server/sandbox/sandbox.go @@ -0,0 +1,396 @@ +package sandbox + +import ( + "crypto/rand" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/Sirupsen/logrus" + "github.com/containernetworking/cni/pkg/ns" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/sys/unix" + "k8s.io/apimachinery/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +type sandboxNetNs struct { + sync.Mutex + ns ns.NetNS + symlink *os.File + closed bool + restored bool +} + +func (ns *sandboxNetNs) symlinkCreate(name string) error { + b := make([]byte, 4) + _, randErr := rand.Reader.Read(b) + if randErr != nil { + return randErr + } + + nsName := fmt.Sprintf("%s-%x", name, b) + symlinkPath := filepath.Join(nsRunDir, nsName) + + if err := os.Symlink(ns.ns.Path(), symlinkPath); err != nil { + return err + } + + fd, err := os.Open(symlinkPath) + if err != nil { + if removeErr := os.RemoveAll(symlinkPath); removeErr != nil { + return removeErr + } + + return err + } + + ns.symlink = fd + + return nil +} + +func (ns *sandboxNetNs) symlinkRemove() error { + if err := ns.symlink.Close(); err != nil { + return err + } + + return os.RemoveAll(ns.symlink.Name()) +} + +func isSymbolicLink(path string) (bool, error) { + fi, err := os.Lstat(path) + if err != nil { + return false, err + } + + return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil +} + +func netNsGet(nspath, name string) (*sandboxNetNs, error) { + if err := ns.IsNSorErr(nspath); err != nil { + return nil, ErrSandboxClosedNetNS + } + + symlink, symlinkErr := isSymbolicLink(nspath) + if symlinkErr != nil { + return nil, symlinkErr + } + + var resolvedNsPath string + if symlink { + path, err := os.Readlink(nspath) + if err != nil { + return nil, err + } + resolvedNsPath = path + } else { + resolvedNsPath = nspath + } + + netNS, err := ns.GetNS(resolvedNsPath) + if err != nil { + return nil, err + } + + netNs := &sandboxNetNs{ns: netNS, closed: false, restored: true} + + if symlink { + fd, err := os.Open(nspath) + if err != nil { + return nil, err + } + + netNs.symlink = fd + } else { + if err := netNs.symlinkCreate(name); err != nil { + return nil, err + } + } + + return netNs, nil +} + +// HostNetNsPath returns the path of the host's network namespace +func HostNetNsPath() (string, error) { + netNS, err := ns.GetCurrentNS() + if err != nil { + return "", err + } + + defer netNS.Close() + + return netNS.Path(), nil +} + +// Sandbox represents a single pod sandbox +type Sandbox struct { + id string + name string + logDir string + labels fields.Set + annotations map[string]string + infraContainer *oci.Container + containers oci.Store + processLabel string + mountLabel string + netns *sandboxNetNs + metadata *pb.PodSandboxMetadata + shmPath string + cgroupParent string + privileged bool + resolvPath string + hostname string +} + +const ( + // PodDefaultNamespace is the default namespace name for pods + PodDefaultNamespace = "default" + // DefaultShmSize is the default size of the SHM device for sandboxs + DefaultShmSize = 64 * 1024 * 1024 + // PodInfraCommand is the default pause command for pods + PodInfraCommand = "/pause" + nsRunDir = "/var/run/netns" +) + +var ( + // ErrSandboxIDEmpty is the error returned when an operation passes "" instead of a sandbox ID + ErrSandboxIDEmpty = errors.New("PodSandboxId should not be empty") + // ErrSandboxClosedNetNS is the error returned when a network namespace is closed and cannot be joined + ErrSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed") +) + +// New creates and populates a new sandbox +// New sandboxes have no containers, no infra container, and no network namespace associated with them. +// An infra container must be attached before the sandbox is added to the state +func New(id, name, logDir string, labels, annotations map[string]string, processLabel, mountLabel string, metadata *pb.PodSandboxMetadata, shmPath, cgroupParent string, privileged bool, resolvPath, hostname string) (*Sandbox, error) { + sb := new(Sandbox) + sb.id = id + sb.name = name + sb.logDir = logDir + sb.labels = labels + sb.annotations = annotations + sb.containers = oci.NewMemoryStore() + sb.processLabel = processLabel + sb.mountLabel = mountLabel + sb.metadata = metadata + sb.shmPath = shmPath + sb.cgroupParent = cgroupParent + sb.privileged = privileged + sb.resolvPath = resolvPath + sb.hostname = hostname + + return sb, nil +} + +// ID returns the sandbox's ID +func (s *Sandbox) ID() string { + return s.id +} + +// Name returns the sandbox's name +func (s *Sandbox) Name() string { + return s.name +} + +// LogDir returns the directory the sandbox logs to +func (s *Sandbox) LogDir() string { + return s.logDir +} + +// Labels returns the sandbox's labels +func (s *Sandbox) Labels() map[string]string { + return s.labels +} + +// Annotations returns the sandbox's annotations +func (s *Sandbox) Annotations() map[string]string { + return s.annotations +} + +// InfraContainer returns the sandbox's infrastructure container +func (s *Sandbox) InfraContainer() *oci.Container { + return s.infraContainer +} + +// Containers returns an array of all the containers in the sandbox +func (s *Sandbox) Containers() []*oci.Container { + return s.containers.List() +} + +// ProcessLabel returns the SELinux process label of the sandbox +func (s *Sandbox) ProcessLabel() string { + return s.processLabel +} + +// MountLabel returns the SELinux mount label of the sandbox +func (s *Sandbox) MountLabel() string { + return s.mountLabel +} + +// Metadata returns Kubernetes metadata associated with the sandbox +func (s *Sandbox) Metadata() *pb.PodSandboxMetadata { + return s.metadata +} + +// ShmPath returns the path to the sandbox's shared memory device +func (s *Sandbox) ShmPath() string { + return s.shmPath +} + +// CgroupParent returns the sandbox's CGroup parent +func (s *Sandbox) CgroupParent() string { + return s.cgroupParent +} + +// Privileged returns whether the sandbox can support privileged containers +func (s *Sandbox) Privileged() bool { + return s.privileged +} + +// ResolvPath returns the path to the sandbox's DNS resolver configuration +func (s *Sandbox) ResolvPath() string { + return s.resolvPath +} + +// Hostname returns the sandbox's hostname +func (s *Sandbox) Hostname() string { + return s.hostname +} + +// AddContainer adds a container to the sandbox +func (s *Sandbox) AddContainer(c *oci.Container) { + s.containers.Add(c.ID(), c) +} + +// GetContainer retrieves the container with given ID from the sandbox +// Returns nil if no such container exists +func (s *Sandbox) GetContainer(id string) *oci.Container { + return s.containers.Get(id) +} + +// RemoveContainer removes the container with given ID from the sandbox +// If no container with that ID exists in the sandbox, no action is taken +func (s *Sandbox) RemoveContainer(id string) { + s.containers.Delete(id) +} + +// SetInfraContainer sets the infrastructure container of a sandbox +// Attempts to set the infrastructure container after one is already present will throw an error +func (s *Sandbox) SetInfraContainer(infraCtr *oci.Container) error { + if s.infraContainer != nil { + return fmt.Errorf("sandbox already has an infra container") + } else if infraCtr == nil { + return fmt.Errorf("must provide non-nil infra container") + } + + s.infraContainer = infraCtr + + return nil +} + +// NetNs retrieves the network namespace of the sandbox +// If the sandbox uses the host namespace, nil is returned +func (s *Sandbox) NetNs() ns.NetNS { + if s.netns == nil { + return nil + } + + return s.netns.ns +} + +// NetNsPath returns the path to the network namespace +// If the sandbox uses the host namespace, "" is returned +func (s *Sandbox) NetNsPath() string { + if s.netns == nil { + return "" + } + + return s.netns.symlink.Name() +} + +// NetNsCreate creates a new network namespace for the sandbox +func (s *Sandbox) NetNsCreate() error { + if s.netns != nil { + return fmt.Errorf("net NS already created") + } + + netNS, err := ns.NewNS() + if err != nil { + return err + } + + s.netns = &sandboxNetNs{ + ns: netNS, + closed: false, + } + + if err := s.netns.symlinkCreate(s.name); err != nil { + logrus.Warnf("Could not create nentns symlink %v", err) + + if err1 := s.netns.ns.Close(); err1 != nil { + return err1 + } + + return err + } + + return nil +} + +// NetNsJoin attempts to join the sandbox to an existing network namespace +// This will fail if the sandbox is already part of a network namespace +func (s *Sandbox) NetNsJoin(nspath, name string) error { + if s.netns != nil { + return fmt.Errorf("sandbox already has a network namespace, cannot join another") + } + + netNS, err := netNsGet(nspath, name) + if err != nil { + return err + } + + s.netns = netNS + + return nil +} + +// NetNsRemove removes the network namespace associated with the sandbox +func (s *Sandbox) NetNsRemove() error { + if s.netns == nil { + logrus.Warn("no networking namespace") + return nil + } + + s.netns.Lock() + defer s.netns.Unlock() + + if s.netns.closed { + // netNsRemove() can be called multiple + // times without returning an error. + return nil + } + + if err := s.netns.symlinkRemove(); err != nil { + return err + } + + if err := s.netns.ns.Close(); err != nil { + return err + } + + if s.netns.restored { + if err := unix.Unmount(s.netns.ns.Path(), unix.MNT_DETACH); err != nil { + return err + } + + if err := os.RemoveAll(s.netns.ns.Path()); err != nil { + return err + } + } + + s.netns.closed = true + return nil +} diff --git a/server/sandbox_list.go b/server/sandbox_list.go index b45220cb..c9046e55 100644 --- a/server/sandbox_list.go +++ b/server/sandbox_list.go @@ -1,8 +1,11 @@ package server import ( + "fmt" + "github.com/Sirupsen/logrus" "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/sandbox" "golang.org/x/net/context" "k8s.io/apimachinery/pkg/fields" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" @@ -29,32 +32,32 @@ func filterSandbox(p *pb.PodSandbox, filter *pb.PodSandboxFilter) bool { // ListPodSandbox returns a list of SandBoxes. func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxRequest) (*pb.ListPodSandboxResponse, error) { logrus.Debugf("ListPodSandboxRequest %+v", req) - s.Update() var pods []*pb.PodSandbox - var podList []*sandbox - for _, sb := range s.state.sandboxes { - podList = append(podList, sb) + var podList []*sandbox.Sandbox + + sandboxes, err := s.state.GetAllSandboxes() + if err != nil { + return nil, fmt.Errorf("error retrieving sandboxes: %v", err) } + podList = append(podList, sandboxes...) + filter := req.Filter // Filter by pod id first. if filter != nil { if filter.Id != "" { - id, err := s.podIDIndex.Get(filter.Id) + sb, err := s.state.LookupSandboxByID(filter.Id) + // TODO if we return something other than a No Such Sandbox should we throw an error instead? if err != nil { - return nil, err - } - sb := s.getSandbox(id) - if sb == nil { - podList = []*sandbox{} + podList = []*sandbox.Sandbox{} } else { - podList = []*sandbox{sb} + podList = []*sandbox.Sandbox{sb} } } } for _, sb := range podList { - podInfraContainer := sb.infraContainer + podInfraContainer := sb.InfraContainer() if podInfraContainer == nil { // this can't really happen, but if it does because of a bug // it's better not to panic @@ -71,12 +74,12 @@ func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxReque } pod := &pb.PodSandbox{ - Id: sb.id, + Id: sb.ID(), CreatedAt: created, State: rStatus, - Labels: sb.labels, - Annotations: sb.annotations, - Metadata: sb.metadata, + Labels: sb.Labels(), + Annotations: sb.Annotations(), + Metadata: sb.Metadata(), } // Filter by other criteria such as state and labels. diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go index bf7d18cb..1d16c77d 100644 --- a/server/sandbox_remove.go +++ b/server/sandbox_remove.go @@ -6,6 +6,7 @@ import ( "github.com/Sirupsen/logrus" "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/sandbox" "github.com/opencontainers/selinux/go-selinux/label" "golang.org/x/net/context" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" @@ -15,10 +16,9 @@ import ( // sandbox, they should be force deleted. func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxRequest) (*pb.RemovePodSandboxResponse, error) { logrus.Debugf("RemovePodSandboxRequest %+v", req) - s.Update() sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) if err != nil { - if err == errSandboxIDEmpty { + if err == sandbox.ErrSandboxIDEmpty { return nil, err } @@ -27,8 +27,8 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR return resp, nil } - podInfraContainer := sb.infraContainer - containers := sb.containers.List() + podInfraContainer := sb.InfraContainer() + containers := sb.Containers() containers = append(containers, podInfraContainer) // Delete all the containers in the sandbox @@ -45,7 +45,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR } if err := s.runtime.DeleteContainer(c); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) } if c == podInfraContainer { @@ -53,54 +53,42 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR } if err := s.storage.StopContainer(c.ID()); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) } if err := s.storage.DeleteContainer(c.ID()); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) } - s.releaseContainerName(c.Name()) - s.removeContainer(c) - if err := s.ctrIDIndex.Delete(c.ID()); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s from index: %v", c.Name(), sb.id, err) + if err := s.removeContainer(c); err != nil { + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) } } - if err := label.ReleaseLabel(sb.processLabel); err != nil { + if err := label.ReleaseLabel(sb.ProcessLabel()); err != nil { return nil, err } // unmount the shm for the pod - if sb.shmPath != "/dev/shm" { - if err := syscall.Unmount(sb.shmPath, syscall.MNT_DETACH); err != nil { + if sb.ShmPath() != "/dev/shm" { + if err := syscall.Unmount(sb.ShmPath(), syscall.MNT_DETACH); err != nil { return nil, err } } - if err := sb.netNsRemove(); err != nil { - return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err) + if err := sb.NetNsRemove(); err != nil { + return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.ID(), err) } - s.removeContainer(podInfraContainer) - sb.infraContainer = nil + if err := s.removeSandbox(sb.ID()); err != nil { + return nil, fmt.Errorf("error removing sandbox %s: %v", sb.ID(), err) + } // Remove the files related to the sandbox - if err := s.storage.StopContainer(sb.id); err != nil { - return nil, fmt.Errorf("failed to delete sandbox container in pod sandbox %s: %v", sb.id, err) + if err := s.storage.StopContainer(sb.ID()); err != nil { + return nil, fmt.Errorf("failed to delete sandbox container in pod sandbox %s: %v", sb.ID(), err) } - if err := s.storage.RemovePodSandbox(sb.id); err != nil { - return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.id, err) - } - - s.releaseContainerName(podInfraContainer.Name()) - if err := s.ctrIDIndex.Delete(podInfraContainer.ID()); err != nil { - return nil, fmt.Errorf("failed to delete infra container %s in pod sandbox %s from index: %v", podInfraContainer.ID(), sb.id, err) - } - - s.releasePodName(sb.name) - s.removeSandbox(sb.id) - if err := s.podIDIndex.Delete(sb.id); err != nil { - return nil, fmt.Errorf("failed to delete pod sandbox %s from index: %v", sb.id, err) + if err := s.storage.RemovePodSandbox(sb.ID()); err != nil { + return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.ID(), err) } resp := &pb.RemovePodSandboxResponse{} diff --git a/server/sandbox_run.go b/server/sandbox_run.go index e883a977..225ea155 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -11,6 +11,7 @@ import ( "github.com/Sirupsen/logrus" "github.com/containers/storage/storage" "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/sandbox" "github.com/opencontainers/runtime-tools/generate" "github.com/opencontainers/selinux/go-selinux/label" "golang.org/x/net/context" @@ -65,9 +66,6 @@ func (s *Server) runContainer(container *oci.Container, cgroupParent string) err // RunPodSandbox creates and runs a pod-level sandbox. func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { - s.updateLock.RLock() - defer s.updateLock.RUnlock() - logrus.Debugf("RunPodSandboxRequest %+v", req) var processLabel, mountLabel, netNsPath, resolvPath string // process req.Name @@ -88,12 +86,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, err } - defer func() { - if err != nil { - s.releasePodName(name) - } - }() - podContainer, err := s.storage.CreatePodSandbox(s.imageContext, name, id, s.config.PauseImage, "", @@ -128,7 +120,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest if podContainer.Config != nil { g.SetProcessArgs(podContainer.Config.Config.Cmd) } else { - g.SetProcessArgs([]string{podInfraCommand}) + g.SetProcessArgs([]string{sandbox.PodInfraCommand}) } } else { g.SetProcessArgs([]string{s.config.PauseCommand}) @@ -225,24 +217,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, err } - defer func() { - if err != nil { - s.releaseContainerName(containerName) - } - }() - - if err = s.ctrIDIndex.Add(id); err != nil { - return nil, err - } - - defer func() { - if err != nil { - if err2 := s.ctrIDIndex.Delete(id); err2 != nil { - logrus.Warnf("couldn't delete ctr id %s from idIndex", id) - } - } - }() - // set log path inside log directory logPath := filepath.Join(logDir, id+".log") @@ -266,36 +240,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation("ocid/resolv_path", resolvPath) g.AddAnnotation("ocid/hostname", hostname) - sb := &sandbox{ - id: id, - name: name, - logDir: logDir, - labels: labels, - annotations: annotations, - containers: oci.NewMemoryStore(), - processLabel: processLabel, - mountLabel: mountLabel, - metadata: metadata, - shmPath: shmPath, - privileged: privileged, - resolvPath: resolvPath, - hostname: hostname, - } - - defer func() { - if err != nil { - s.removeSandbox(id) - if err2 := s.podIDIndex.Delete(id); err2 != nil { - logrus.Warnf("couldn't delete pod id %s from idIndex", id) - } - } - }() - - s.addSandbox(sb) - if err = s.podIDIndex.Add(id); err != nil { - return nil, err - } - for k, v := range annotations { g.AddAnnotation(k, v) } @@ -321,9 +265,12 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } else { g.SetLinuxCgroupsPath(cgroupParent + "/" + id) - } - sb.cgroupParent = cgroupParent + } + + sb, err := sandbox.New(id, name, logDir, labels, annotations, processLabel, mountLabel, metadata, shmPath, cgroupParent, privileged, resolvPath, hostname) + if err != nil { + return nil, err } hostNetwork := req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().HostNetwork @@ -335,13 +282,13 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, err } - netNsPath, err = hostNetNsPath() + netNsPath, err = sandbox.HostNetNsPath() if err != nil { return nil, err } } else { // Create the sandbox network namespace - if err = sb.netNsCreate(); err != nil { + if err = sb.NetNsCreate(); err != nil { return nil, err } @@ -350,18 +297,18 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return } - if netnsErr := sb.netNsRemove(); netnsErr != nil { + if netnsErr := sb.NetNsRemove(); netnsErr != nil { logrus.Warnf("Failed to remove networking namespace: %v", netnsErr) } }() // Pass the created namespace path to the runtime - err = g.AddOrReplaceLinuxNamespace("network", sb.netNsPath()) + err = g.AddOrReplaceLinuxNamespace("network", sb.NetNsPath()) if err != nil { return nil, err } - netNsPath = sb.netNsPath() + netNsPath = sb.NetNsPath() } if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().HostPid { @@ -385,23 +332,28 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest saveOptions := generate.ExportOptions{} mountPoint, err := s.storage.StartContainer(id) if err != nil { - return nil, fmt.Errorf("failed to mount container %s in pod sandbox %s(%s): %v", containerName, sb.name, id, err) + return nil, fmt.Errorf("failed to mount container %s in pod sandbox %s(%s): %v", containerName, name, id, err) } g.SetRootPath(mountPoint) err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions) if err != nil { - return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", sb.name, id, err) + return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", name, id, err) } if err = g.SaveToFile(filepath.Join(podContainer.RunDir, "config.json"), saveOptions); err != nil { - return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err) + return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", name, id, err) } - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), labels, annotations, nil, nil, id, false, sb.Privileged()) if err != nil { return nil, err } + if err := sb.SetInfraContainer(container); err != nil { + return nil, err + } - sb.infraContainer = container + if err = s.addSandbox(sb); err != nil { + return nil, err + } // setup the network if !hostNetwork { @@ -411,7 +363,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } } - if err = s.runContainer(container, sb.cgroupParent); err != nil { + if err = s.runContainer(container, sb.CgroupParent()); err != nil { return nil, err } @@ -461,7 +413,7 @@ func setupShm(podSandboxRunDir, mountLabel string) (shmPath string, err error) { if err = os.Mkdir(shmPath, 0700); err != nil { return "", err } - shmOptions := "mode=1777,size=" + strconv.Itoa(defaultShmSize) + shmOptions := "mode=1777,size=" + strconv.Itoa(sandbox.DefaultShmSize) if err = syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmOptions, mountLabel)); err != nil { return "", fmt.Errorf("failed to mount shm tmpfs for pod: %v", err) diff --git a/server/sandbox_status.go b/server/sandbox_status.go index ea16802d..02cae61c 100644 --- a/server/sandbox_status.go +++ b/server/sandbox_status.go @@ -10,13 +10,12 @@ import ( // PodSandboxStatus returns the Status of the PodSandbox. func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusRequest) (*pb.PodSandboxStatusResponse, error) { logrus.Debugf("PodSandboxStatusRequest %+v", req) - s.Update() sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) if err != nil { return nil, err } - podInfraContainer := sb.infraContainer + podInfraContainer := sb.InfraContainer() if err = s.runtime.UpdateStatus(podInfraContainer); err != nil { return nil, err } @@ -29,7 +28,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR return nil, err } podNamespace := "" - ip, err := s.netPlugin.GetContainerNetworkStatus(netNsPath, podNamespace, sb.id, podInfraContainer.Name()) + ip, err := s.netPlugin.GetContainerNetworkStatus(netNsPath, podNamespace, sb.ID(), podInfraContainer.Name()) if err != nil { // ignore the error on network status ip = "" @@ -40,7 +39,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR rStatus = pb.PodSandboxState_SANDBOX_READY } - sandboxID := sb.id + sandboxID := sb.ID() resp := &pb.PodSandboxStatusResponse{ Status: &pb.PodSandboxStatus{ Id: sandboxID, @@ -52,9 +51,9 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR }, Network: &pb.PodSandboxNetworkStatus{Ip: ip}, State: rStatus, - Labels: sb.labels, - Annotations: sb.annotations, - Metadata: sb.metadata, + Labels: sb.Labels(), + Annotations: sb.Annotations(), + Metadata: sb.Metadata(), }, } diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go index e8dd61c6..22b54d21 100644 --- a/server/sandbox_stop.go +++ b/server/sandbox_stop.go @@ -14,34 +14,33 @@ import ( // sandbox, they should be force terminated. func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (*pb.StopPodSandboxResponse, error) { logrus.Debugf("StopPodSandboxRequest %+v", req) - s.Update() sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) if err != nil { return nil, err } podNamespace := "" - podInfraContainer := sb.infraContainer + podInfraContainer := sb.InfraContainer() netnsPath, err := podInfraContainer.NetNsPath() if err != nil { return nil, err } if _, err := os.Stat(netnsPath); err == nil { - if err2 := s.netPlugin.TearDownPod(netnsPath, podNamespace, sb.id, podInfraContainer.Name()); err2 != nil { + if err2 := s.netPlugin.TearDownPod(netnsPath, podNamespace, sb.ID(), podInfraContainer.Name()); err2 != nil { return nil, fmt.Errorf("failed to destroy network for container %s in sandbox %s: %v", - podInfraContainer.Name(), sb.id, err2) + podInfraContainer.Name(), sb.ID(), err2) } } else if !os.IsNotExist(err) { // it's ok for netnsPath to *not* exist return nil, fmt.Errorf("failed to stat netns path for container %s in sandbox %s before tearing down the network: %v", - podInfraContainer.Name(), sb.id, err) + podInfraContainer.Name(), sb.ID(), err) } // Close the sandbox networking namespace. - if err := sb.netNsRemove(); err != nil { + if err := sb.NetNsRemove(); err != nil { return nil, err } - containers := sb.containers.List() + containers := sb.Containers() containers = append(containers, podInfraContainer) for _, c := range containers { @@ -51,7 +50,7 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque cStatus := s.runtime.ContainerStatus(c) if cStatus.Status != oci.ContainerStateStopped { if err := s.runtime.StopContainer(c); err != nil { - return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) } } } diff --git a/server/server.go b/server/server.go index 2710cc13..f1c96a65 100644 --- a/server/server.go +++ b/server/server.go @@ -4,23 +4,18 @@ import ( "encoding/json" "fmt" "io/ioutil" - "os" "path/filepath" - "sync" - "github.com/Sirupsen/logrus" "github.com/containers/image/types" sstorage "github.com/containers/storage/storage" - "github.com/docker/docker/pkg/registrar" - "github.com/docker/docker/pkg/truncindex" + "github.com/docker/docker/pkg/stringid" "github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/pkg/ocicni" "github.com/kubernetes-incubator/cri-o/pkg/storage" "github.com/kubernetes-incubator/cri-o/server/apparmor" + "github.com/kubernetes-incubator/cri-o/server/sandbox" "github.com/kubernetes-incubator/cri-o/server/seccomp" - rspec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux/label" - pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" + "github.com/kubernetes-incubator/cri-o/server/state" ) const ( @@ -34,14 +29,8 @@ type Server struct { store sstorage.Store images storage.ImageServer storage storage.RuntimeServer - stateLock sync.Mutex - updateLock sync.RWMutex - state *serverState + state state.Store netPlugin ocicni.CNIPlugin - podNameIndex *registrar.Registrar - podIDIndex *truncindex.TruncIndex - ctrNameIndex *registrar.Registrar - ctrIDIndex *truncindex.TruncIndex imageContext *types.SystemContext seccompEnabled bool @@ -51,386 +40,6 @@ type Server struct { appArmorProfile string } -func (s *Server) loadContainer(id string) error { - config, err := s.store.GetFromContainerDirectory(id, "config.json") - if err != nil { - return err - } - var m rspec.Spec - if err = json.Unmarshal(config, &m); err != nil { - return err - } - labels := make(map[string]string) - if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil { - return err - } - name := m.Annotations["ocid/name"] - name, err = s.reserveContainerName(id, name) - if err != nil { - return err - } - - defer func() { - if err != nil { - s.releaseContainerName(name) - } - }() - - var metadata pb.ContainerMetadata - if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil { - return err - } - sb := s.getSandbox(m.Annotations["ocid/sandbox_id"]) - if sb == nil { - return fmt.Errorf("could not get sandbox with id %s, skipping", m.Annotations["ocid/sandbox_id"]) - } - - var tty bool - if v := m.Annotations["ocid/tty"]; v == "true" { - tty = true - } - containerPath, err := s.store.GetContainerRunDirectory(id) - if err != nil { - return err - } - - var img *pb.ImageSpec - image, ok := m.Annotations["ocid/image"] - if ok { - img = &pb.ImageSpec{ - Image: image, - } - } - - annotations := make(map[string]string) - if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil { - return err - } - - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty, sb.privileged) - if err != nil { - return err - } - if err = s.runtime.UpdateStatus(ctr); err != nil { - return fmt.Errorf("error updating status for container %s: %v", ctr.ID(), err) - } - s.addContainer(ctr) - return s.ctrIDIndex.Add(id) -} - -func configNetNsPath(spec rspec.Spec) (string, error) { - for _, ns := range spec.Linux.Namespaces { - if ns.Type != rspec.NetworkNamespace { - continue - } - - if ns.Path == "" { - return "", fmt.Errorf("empty networking namespace") - } - - return ns.Path, nil - } - - return "", fmt.Errorf("missing networking namespace") -} - -func (s *Server) loadSandbox(id string) error { - config, err := s.store.GetFromContainerDirectory(id, "config.json") - if err != nil { - return err - } - var m rspec.Spec - if err = json.Unmarshal(config, &m); err != nil { - return err - } - labels := make(map[string]string) - if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil { - return err - } - name := m.Annotations["ocid/name"] - name, err = s.reservePodName(id, name) - if err != nil { - return err - } - defer func() { - if err != nil { - s.releasePodName(name) - } - }() - var metadata pb.PodSandboxMetadata - if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil { - return err - } - - processLabel, mountLabel, err := label.InitLabels(label.DupSecOpt(m.Process.SelinuxLabel)) - if err != nil { - return err - } - - annotations := make(map[string]string) - if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil { - return err - } - - privileged := m.Annotations["ocid/privileged_runtime"] == "true" - - sb := &sandbox{ - id: id, - name: name, - logDir: filepath.Dir(m.Annotations["ocid/log_path"]), - labels: labels, - containers: oci.NewMemoryStore(), - processLabel: processLabel, - mountLabel: mountLabel, - annotations: annotations, - metadata: &metadata, - shmPath: m.Annotations["ocid/shm_path"], - privileged: privileged, - resolvPath: m.Annotations["ocid/resolv_path"], - } - - // We add a netNS only if we can load a permanent one. - // Otherwise, the sandbox will live in the host namespace. - netNsPath, err := configNetNsPath(m) - if err == nil { - netNS, nsErr := netNsGet(netNsPath, sb.name) - // If we can't load the networking namespace - // because it's closed, we just set the sb netns - // pointer to nil. Otherwise we return an error. - if nsErr != nil && nsErr != errSandboxClosedNetNS { - return nsErr - } - - sb.netns = netNS - } - - s.addSandbox(sb) - - defer func() { - if err != nil { - s.removeSandbox(sb.id) - } - }() - - sandboxPath, err := s.store.GetContainerRunDirectory(id) - if err != nil { - return err - } - - cname, err := s.reserveContainerName(m.Annotations["ocid/container_id"], m.Annotations["ocid/container_name"]) - if err != nil { - return err - } - defer func() { - if err != nil { - s.releaseContainerName(cname) - } - }() - - scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, nil, nil, id, false, privileged) - if err != nil { - return err - } - if err = s.runtime.UpdateStatus(scontainer); err != nil { - return fmt.Errorf("error updating status for pod sandbox infra container %s: %v", scontainer.ID(), err) - } - if err = label.ReserveLabel(processLabel); err != nil { - return err - } - sb.infraContainer = scontainer - if err = s.ctrIDIndex.Add(scontainer.ID()); err != nil { - return err - } - if err = s.podIDIndex.Add(id); err != nil { - return err - } - return nil -} - -func (s *Server) restore() { - containers, err := s.store.Containers() - if err != nil && !os.IsNotExist(err) { - logrus.Warnf("could not read containers and sandboxes: %v", err) - } - pods := map[string]*storage.RuntimeContainerMetadata{} - podContainers := map[string]*storage.RuntimeContainerMetadata{} - for _, container := range containers { - metadata, err2 := s.storage.GetContainerMetadata(container.ID) - if err2 != nil { - logrus.Warnf("error parsing metadata for %s: %v, ignoring", container.ID, err2) - continue - } - if metadata.Pod { - pods[container.ID] = &metadata - } else { - podContainers[container.ID] = &metadata - } - } - for containerID, metadata := range pods { - if err = s.loadSandbox(containerID); err != nil { - logrus.Warnf("could not restore sandbox %s container %s: %v", metadata.PodID, containerID, err) - } - } - for containerID := range podContainers { - if err := s.loadContainer(containerID); err != nil { - logrus.Warnf("could not restore container %s: %v", containerID, err) - } - } -} - -// Update makes changes to the server's state (lists of pods and containers) to -// reflect the list of pods and containers that are stored on disk, possibly -// having been modified by other parties -func (s *Server) Update() { - logrus.Debugf("updating sandbox and container information") - if err := s.update(); err != nil { - logrus.Errorf("error updating sandbox and container information: %v", err) - } -} - -func (s *Server) update() error { - s.updateLock.Lock() - defer s.updateLock.Unlock() - - containers, err := s.store.Containers() - if err != nil && !os.IsNotExist(err) { - logrus.Warnf("could not read containers and sandboxes: %v", err) - return err - } - newPods := map[string]*storage.RuntimeContainerMetadata{} - oldPods := map[string]string{} - removedPods := map[string]string{} - newPodContainers := map[string]*storage.RuntimeContainerMetadata{} - oldPodContainers := map[string]string{} - removedPodContainers := map[string]string{} - for _, container := range containers { - if s.hasSandbox(container.ID) { - // FIXME: do we need to reload/update any info about the sandbox? - oldPods[container.ID] = container.ID - oldPodContainers[container.ID] = container.ID - continue - } - if s.getContainer(container.ID) != nil { - // FIXME: do we need to reload/update any info about the container? - oldPodContainers[container.ID] = container.ID - continue - } - // not previously known, so figure out what it is - metadata, err2 := s.storage.GetContainerMetadata(container.ID) - if err2 != nil { - logrus.Errorf("error parsing metadata for %s: %v, ignoring", container.ID, err2) - continue - } - if metadata.Pod { - newPods[container.ID] = &metadata - } else { - newPodContainers[container.ID] = &metadata - } - } - s.ctrIDIndex.Iterate(func(id string) { - if _, ok := oldPodContainers[id]; !ok { - // this container's ID wasn't in the updated list -> removed - removedPodContainers[id] = id - } - }) - for removedPodContainer := range removedPodContainers { - // forget this container - c := s.getContainer(removedPodContainer) - if c == nil { - logrus.Warnf("bad state when getting container removed %+v", removedPodContainer) - continue - } - s.releaseContainerName(c.Name()) - s.removeContainer(c) - if err = s.ctrIDIndex.Delete(c.ID()); err != nil { - return err - } - logrus.Debugf("forgetting removed pod container %s", c.ID()) - } - s.podIDIndex.Iterate(func(id string) { - if _, ok := oldPods[id]; !ok { - // this pod's ID wasn't in the updated list -> removed - removedPods[id] = id - } - }) - for removedPod := range removedPods { - // forget this pod - sb := s.getSandbox(removedPod) - if sb == nil { - logrus.Warnf("bad state when getting pod to remove %+v", removedPod) - continue - } - podInfraContainer := sb.infraContainer - s.releaseContainerName(podInfraContainer.Name()) - s.removeContainer(podInfraContainer) - if err = s.ctrIDIndex.Delete(podInfraContainer.ID()); err != nil { - return err - } - sb.infraContainer = nil - s.releasePodName(sb.name) - s.removeSandbox(sb.id) - if err = s.podIDIndex.Delete(sb.id); err != nil { - return err - } - logrus.Debugf("forgetting removed pod %s", sb.id) - } - for sandboxID := range newPods { - // load this pod - if err = s.loadSandbox(sandboxID); err != nil { - logrus.Warnf("could not load new pod sandbox %s: %v, ignoring", sandboxID, err) - } else { - logrus.Debugf("loaded new pod sandbox %s", sandboxID, err) - } - } - for containerID := range newPodContainers { - // load this container - if err = s.loadContainer(containerID); err != nil { - logrus.Warnf("could not load new sandbox container %s: %v, ignoring", containerID, err) - } else { - logrus.Debugf("loaded new pod container %s", containerID, err) - } - } - return nil -} - -func (s *Server) reservePodName(id, name string) (string, error) { - if err := s.podNameIndex.Reserve(name, id); err != nil { - if err == registrar.ErrNameReserved { - id, err := s.podNameIndex.Get(name) - if err != nil { - logrus.Warnf("conflict, pod name %q already reserved", name) - return "", err - } - return "", fmt.Errorf("conflict, name %q already reserved for pod %q", name, id) - } - return "", fmt.Errorf("error reserving pod name %q", name) - } - return name, nil -} - -func (s *Server) releasePodName(name string) { - s.podNameIndex.Release(name) -} - -func (s *Server) reserveContainerName(id, name string) (string, error) { - if err := s.ctrNameIndex.Reserve(name, id); err != nil { - if err == registrar.ErrNameReserved { - id, err := s.ctrNameIndex.Get(name) - if err != nil { - logrus.Warnf("conflict, ctr name %q already reserved", name) - return "", err - } - return "", fmt.Errorf("conflict, name %q already reserved for ctr %q", name, id) - } - return "", fmt.Errorf("error reserving ctr name %s", name) - } - return name, nil -} - -func (s *Server) releaseContainerName(name string) { - s.ctrNameIndex.Release(name) -} - // Shutdown attempts to shut down the server's storage cleanly func (s *Server) Shutdown() error { _, err := s.store.Shutdown(false) @@ -463,23 +72,23 @@ func New(config *Config) (*Server, error) { if err != nil { return nil, err } - sandboxes := make(map[string]*sandbox) - containers := oci.NewMemoryStore() netPlugin, err := ocicni.InitCNI(config.NetworkDir, config.PluginDir) if err != nil { return nil, err } + // TODO: Should we put this elsewhere? Separate directory specified in the config? + state, err := state.NewFileState(filepath.Join(config.RunRoot, "ocid_state"), r) + if err != nil { + return nil, err + } s := &Server{ - runtime: r, - store: store, - images: imageService, - storage: storageRuntimeService, - netPlugin: netPlugin, - config: *config, - state: &serverState{ - sandboxes: sandboxes, - containers: containers, - }, + runtime: r, + store: store, + images: imageService, + storage: storageRuntimeService, + netPlugin: netPlugin, + config: *config, + state: state, seccompEnabled: seccomp.IsEnabled(), appArmorEnabled: apparmor.IsEnabled(), appArmorProfile: config.ApparmorProfile, @@ -502,72 +111,67 @@ func New(config *Config) (*Server, error) { } } - s.podIDIndex = truncindex.NewTruncIndex([]string{}) - s.podNameIndex = registrar.NewRegistrar() - s.ctrIDIndex = truncindex.NewTruncIndex([]string{}) - s.ctrNameIndex = registrar.NewRegistrar() s.imageContext = &types.SystemContext{ SignaturePolicyPath: config.ImageConfig.SignaturePolicyPath, } - s.restore() - - logrus.Debugf("sandboxes: %v", s.state.sandboxes) - logrus.Debugf("containers: %v", s.state.containers) return s, nil } -type serverState struct { - sandboxes map[string]*sandbox - containers oci.Store +func (s *Server) addSandbox(sb *sandbox.Sandbox) error { + return s.state.AddSandbox(sb) } -func (s *Server) addSandbox(sb *sandbox) { - s.stateLock.Lock() - s.state.sandboxes[sb.id] = sb - s.stateLock.Unlock() -} - -func (s *Server) getSandbox(id string) *sandbox { - s.stateLock.Lock() - sb := s.state.sandboxes[id] - s.stateLock.Unlock() - return sb +func (s *Server) getSandbox(id string) (*sandbox.Sandbox, error) { + return s.state.GetSandbox(id) } func (s *Server) hasSandbox(id string) bool { - s.stateLock.Lock() - _, ok := s.state.sandboxes[id] - s.stateLock.Unlock() - return ok + return s.state.HasSandbox(id) } -func (s *Server) removeSandbox(id string) { - s.stateLock.Lock() - delete(s.state.sandboxes, id) - s.stateLock.Unlock() +func (s *Server) removeSandbox(id string) error { + return s.state.DeleteSandbox(id) } -func (s *Server) addContainer(c *oci.Container) { - s.stateLock.Lock() - sandbox := s.state.sandboxes[c.Sandbox()] - // TODO(runcom): handle !ok above!!! otherwise it panics! - sandbox.addContainer(c) - s.state.containers.Add(c.ID(), c) - s.stateLock.Unlock() +func (s *Server) addContainer(c *oci.Container) error { + return s.state.AddContainer(c) } -func (s *Server) getContainer(id string) *oci.Container { - s.stateLock.Lock() - c := s.state.containers.Get(id) - s.stateLock.Unlock() - return c +func (s *Server) getContainer(id string) (*oci.Container, error) { + sbID, err := s.state.GetContainerSandbox(id) + if err != nil { + return nil, err + } + + return s.state.GetContainer(id, sbID) } -func (s *Server) removeContainer(c *oci.Container) { - s.stateLock.Lock() - sandbox := s.state.sandboxes[c.Sandbox()] - sandbox.removeContainer(c) - s.state.containers.Delete(c.ID()) - s.stateLock.Unlock() +func (s *Server) removeContainer(c *oci.Container) error { + return s.state.DeleteContainer(c.ID(), c.Sandbox()) +} + +func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) { + var ( + err error + id = stringid.GenerateNonCryptoID() + ) + if namespace == "" { + namespace = sandbox.PodDefaultNamespace + } + + return id, fmt.Sprintf("%s-%s-%v", namespace, name, attempt), err +} + +func (s *Server) getPodSandboxFromRequest(podSandboxID string) (*sandbox.Sandbox, error) { + if podSandboxID == "" { + return nil, sandbox.ErrSandboxIDEmpty + } + + sb, err := s.state.LookupSandboxByID(podSandboxID) + if err != nil { + return nil, fmt.Errorf("could not retrieve pod sandbox with ID starting with %v: %v", podSandboxID, err) + } + + return sb, nil } diff --git a/server/state/file_state.go b/server/state/file_state.go new file mode 100644 index 00000000..69f5f735 --- /dev/null +++ b/server/state/file_state.go @@ -0,0 +1,891 @@ +package state + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path" + "syscall" + + "github.com/Sirupsen/logrus" + "github.com/containernetworking/cni/pkg/ns" + "github.com/containers/storage/storage" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/sandbox" + "k8s.io/apimachinery/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// The state is a directory on disk containing a lockfile (STATE_LOCK) and a number of sandboxes (directories) +// Each sandbox has a description file (description.json) containing basic information about it +// Containers are individual files within a sandbox's folder named by their full idea and '.json' +// JSON encoding is was used during development mainly for ease of debugging. +// For now, there is a single global lockfile. Eventually, it is desired to move toward a global lock for +// sandbox creation/deletion and additional, separate per-sandbox locks to improve performance. + +// FileState is a file-based store for CRI-O's state +// It allows multiple programs (e.g. kpod and CRI-O) to interact with the same set of containers without races +type FileState struct { + rootPath string + lockfile storage.Locker + memoryState Store + runtime *oci.Runtime +} + +// Net namespace is taken from enclosing sandbox +// State is not included at all, we assume the runtime has it +type containerFile struct { + ID string `json:"id"` + Name string `json:"name"` + BundlePath string `json:"bundlePath"` + LogPath string `json:"logPath"` + Labels fields.Set `json:"labels"` + Annotations fields.Set `json:"annotations"` + Image *pb.ImageSpec `json:"image"` + Sandbox string `json:"sandbox"` + Terminal bool `json:"terminal"` + Privileged bool `json:"privileged"` + Metadata *pb.ContainerMetadata `json:"metadata"` +} + +type sandboxFile struct { + ID string `json:"id"` + Name string `json:"name"` + LogDir string `json:"logDir"` + Labels fields.Set `json:"labels"` + Annotations map[string]string `json:"annotations"` + InfraContainer string `json:"infraContainer"` // ID of infra container + Containers []string `json:"containers"` // List of IDs + ProcessLabel string `json:"processLabel"` + MountLabel string `json:"mountLabel"` + NetNsPath string `json:"netNsPath"` + Metadata *pb.PodSandboxMetadata `json:"metadata"` + ShmPath string `json:"shmPath"` + CgroupParent string `json:"cgroupParent"` + Privileged bool `json:"privileged"` + ResolvPath string `json:"resolvPath"` + Hostname string `json:"hostname"` +} + +// Sync the in-memory state and the state on disk +// Conditional on state being modified +func (s *FileState) syncWithDisk() error { + modified, err := s.lockfile.Modified() + if err != nil { + return fmt.Errorf("file locking error: %v", err) + } else if !modified { + logrus.Debugf("on-disk state unmodified, leaving state unchanged") + // On-disk state unmodified, don't need to do anything + return nil + } + + logrus.Debugf("on-disk state modified, going to rebuild from disk") + + return s.forceSyncWithDisk() +} + +// Force an unconditional sync of on-disk and in-memory state +// Mostly just called by normal syncWithDisk(), but also to force a sync on startup +// For now, this uses a brute-force approach - when on-disk state is modified, +// throw away the in-memory state and rebuild it entirely from on-disk state +// TODO: More efficient algorithm, only replacing sandboxes if they've been modified +// (Might need to retain this implementation for initial sync) +func (s *FileState) forceSyncWithDisk() error { + // Get a list of all directories under the root path - each should be a sandbox + dirListing, err := ioutil.ReadDir(s.rootPath) + if err != nil { + return fmt.Errorf("error listing contents of root path: %v", err) + } + + newState := NewInMemoryState() + + // Loop through contents of the root directory, transforming all directories into sandboxes + for _, file := range dirListing { + if !file.IsDir() { + continue + } + + // The folder's name should be the sandbox ID + sandbox, err := s.getSandboxFromDisk(file.Name()) + if err != nil { + return err + } + + if err := newState.AddSandbox(sandbox); err != nil { + return fmt.Errorf("error populating new state: %v", err) + } + + logrus.Debugf("synced sandbox %s from disk", sandbox.ID()) + } + + // Loop through the old state, looking for removed sandboxes + // Close their network namespaces to make sure we don't leak FDs + // (We can assume that whoever removed them did the rest of the cleanup) + oldSandboxes, err := s.memoryState.GetAllSandboxes() + if err != nil { + return fmt.Errorf("error retrieving old sandboxes to close netns: %v", err) + } + for _, sb := range oldSandboxes { + if !newState.HasSandbox(sb.ID()) && sb.NetNs() != nil { + if err := sb.NetNsRemove(); err != nil { + return fmt.Errorf("error closing network namespace of sandbox %v: %v", sb.ID(), err) + } + } + } + + s.memoryState = newState + + logrus.Debugf("successfully rebuilt state from disk") + + return nil +} + +// Convert a sandbox to on-disk format +func sandboxToSandboxFile(sb *sandbox.Sandbox) *sandboxFile { + sbFile := sandboxFile{ + ID: sb.ID(), + Name: sb.Name(), + LogDir: sb.LogDir(), + Labels: sb.Labels(), + Annotations: sb.Annotations(), + Containers: make([]string, 0, len(sb.Containers())), + ProcessLabel: sb.ProcessLabel(), + MountLabel: sb.MountLabel(), + Metadata: sb.Metadata(), + ShmPath: sb.ShmPath(), + CgroupParent: sb.CgroupParent(), + Privileged: sb.Privileged(), + ResolvPath: sb.ResolvPath(), + Hostname: sb.Hostname(), + } + + netNs := sb.NetNs() + if netNs != nil { + sbFile.NetNsPath = netNs.Path() + } + + for _, ctr := range sb.Containers() { + sbFile.Containers = append(sbFile.Containers, ctr.ID()) + } + + sbFile.InfraContainer = sb.InfraContainer().ID() + + return &sbFile +} + +// Convert a sandbox from on-disk format to normal format +func (s *FileState) sandboxFileToSandbox(sbFile *sandboxFile) (*sandbox.Sandbox, error) { + sb, err := sandbox.New(sbFile.ID, sbFile.Name, sbFile.LogDir, sbFile.Labels, sbFile.Annotations, sbFile.ProcessLabel, sbFile.MountLabel, sbFile.Metadata, sbFile.ShmPath, sbFile.CgroupParent, sbFile.Privileged, sbFile.ResolvPath, sbFile.Hostname) + if err != nil { + return nil, fmt.Errorf("error creating sandbox with ID %v: %v", sbFile.ID, err) + } + + if sbFile.NetNsPath != "" { + // TODO: Should we error out on ErrSandboxClosedNetNS? + if err := sb.NetNsJoin(sbFile.NetNsPath, sbFile.Name); err != nil { + if err != sandbox.ErrSandboxClosedNetNS { + return nil, fmt.Errorf("error joining network NS %v for sandbox ID %v: %v", sbFile.NetNsPath, sbFile.ID, err) + } + logrus.Debugf("error opening network namespace %v for sandbox %s - namespace is closed", sbFile.NetNsPath, sbFile.ID) + } + } + + logrus.Debugf("got network namespace for sandbox %s: %v", sbFile.ID, sb.NetNsPath()) + + infraCtr, err := s.getContainerFromDisk(sbFile.InfraContainer, sbFile.ID, sb.NetNs()) + if err != nil { + return nil, fmt.Errorf("error retrieving infra container for pod %v: %v", sbFile.ID, err) + } + if err := sb.SetInfraContainer(infraCtr); err != nil { + return nil, fmt.Errorf("error setting infra container for pod %v: %v", sbFile.ID, err) + } + + for _, id := range sbFile.Containers { + ctr, err := s.getContainerFromDisk(id, sbFile.ID, sb.NetNs()) + if err != nil { + return nil, fmt.Errorf("error retrieving container ID %v in pod ID %v: %v", id, sbFile.ID, err) + } + sb.AddContainer(ctr) + } + + return sb, nil +} + +// Retrieve a sandbox and all associated containers from disk +func (s *FileState) getSandboxFromDisk(id string) (*sandbox.Sandbox, error) { + sbFile, err := s.getSandboxFileFromDisk(id) + if err != nil { + return nil, err + } + + logrus.Debugf("Loading sandbox from disk: %+v", sbFile) + + return s.sandboxFileToSandbox(sbFile) +} + +// Retrieve a sandbox file from disk +func (s *FileState) getSandboxFileFromDisk(id string) (*sandboxFile, error) { + sbExists, err := s.checkSandboxExistsOnDisk(id) + if err != nil { + return nil, err + } else if !sbExists { + return nil, fmt.Errorf("sandbox with ID %v does not exist on disk", id) + } + + _, descriptionFilePath := s.getSandboxPath(id) + sbFile := sandboxFile{} + + if err = decodeFromFile(descriptionFilePath, &sbFile); err != nil { + return nil, fmt.Errorf("error retrieving sandbox %v from disk: %v", id, err) + } + + return &sbFile, err +} + +// Save a sandbox to disk +// Will save all associated containers, including infra container, as well +// TODO: This is not in any way atomic. This should be remedied. +func (s *FileState) putSandboxToDisk(sb *sandbox.Sandbox) error { + sbFile := sandboxToSandboxFile(sb) + + logrus.Debugf("Syncing sandbox to disk: %+v", sbFile) + + if err := s.putSandboxFileToDisk(sbFile); err != nil { + return err + } + + // Need to put infra container and any additional containers to disk as well + if err := s.putContainerToDisk(sb.InfraContainer(), false); err != nil { + return fmt.Errorf("error storing sandbox %v infra container: %v", sb.ID(), err) + } + + for _, ctr := range sb.Containers() { + if err := s.putContainerToDisk(ctr, false); err != nil { + return fmt.Errorf("error storing container %v in sandbox %v: %v", ctr.ID(), sb.ID(), err) + } + } + + return nil +} + +// Save a sandbox file to disk +// If sandbox already exists on disk, will cowardly refuse to replace it +func (s *FileState) putSandboxFileToDisk(sbFile *sandboxFile) error { + sbExists, err := s.checkSandboxExistsOnDisk(sbFile.ID) + if err != nil { + return err + } else if sbExists { + return fmt.Errorf("sandbox with ID %v already exists on disk, cowardly refusing to replace", sbFile.ID) + } + + folderPath, filePath := s.getSandboxPath(sbFile.ID) + + // Make the folder first + if err := os.Mkdir(folderPath, 0700); err != nil { + return fmt.Errorf("error creating folder for sandbox ID %v: %v", sbFile.ID, err) + } + + // Then encode the sandbox description data to disk + if err := encodeToFile(filePath, sbFile); err != nil { + if err2 := os.RemoveAll(folderPath); err2 != nil { + logrus.Errorf("error removing incomplete sandbox %v: %v", sbFile.ID, err2) + } + + return fmt.Errorf("error encoding sandbox ID %v description data to disk: %v", sbFile.ID, err) + } + + if err := s.lockfile.Touch(); err != nil { + logrus.Errorf("error updating lockfile writer: %v", err) + } + + return nil +} + +// Update a sandbox's description file on disk (e.g. to add/remove a container from state) +func (s *FileState) updateSandboxFileOnDisk(sbFileNew *sandboxFile) error { + sbExists, err := s.checkSandboxExistsOnDisk(sbFileNew.ID) + if err != nil { + return err + } else if !sbExists { + return fmt.Errorf("cannot update sandbox ID %v as it does not exist on disk", sbFileNew.ID) + } + + // Delete the existing sandbox description file first + _, sbFilePath := s.getSandboxPath(sbFileNew.ID) + if err := os.Remove(sbFilePath); err != nil { + return fmt.Errorf("error removing sandbox file to update sandbox %v: %v", sbFileNew.ID, err) + } + + if err := encodeToFile(sbFilePath, sbFileNew); err != nil { + return err + } + + if err := s.lockfile.Touch(); err != nil { + logrus.Errorf("error updating lockfile writer: %v", err) + } + + return nil +} + +// Remove a sandbox from disk +// TODO: maybe remove the description file first, to ensure we don't have a potentially valid sandbox at the end? +func (s *FileState) removeSandboxFromDisk(id string) error { + sbExists, err := s.checkSandboxExistsOnDisk(id) + if err != nil { + return err + } else if !sbExists { + return fmt.Errorf("cannot remove sandbox ID %v as it does not exist on disk", id) + } + + sbDir, _ := s.getSandboxPath(id) + + if err := s.lockfile.Touch(); err != nil { + logrus.Errorf("error updating lockfile writer: %v", err) + } + + if err := os.RemoveAll(sbDir); err != nil { + return fmt.Errorf("error removing sandbox ID %v: %v", id, err) + } + + return nil +} + +// Check if a sandbox exists on disk and is sanely formatted +// Does not validate sandbox description data +func (s *FileState) checkSandboxExistsOnDisk(id string) (bool, error) { + folderPath, filePath := s.getSandboxPath(id) + + folderStat, err := os.Stat(folderPath) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, fmt.Errorf("error accessing sandbox folder %v: %v", folderPath, err) + } + + if !folderStat.IsDir() { + return false, fmt.Errorf("sandbox folder %v is not a folder", folderPath) + } + + // Don't need to IsNotExist check here - a sandbox folder without a description file is unusable + // So any error is bad + if _, err := os.Stat(filePath); err != nil { + return false, fmt.Errorf("sandbox folder %v exists but description file %v cannot be accessed: %v", folderPath, filePath, err) + } + + return true, nil +} + +// Get path of a sandbox on disk +// Returns two strings: the first is the path of the sandbox's folder, the second the sandbox's JSON description file +func (s *FileState) getSandboxPath(id string) (string, string) { + folderPath := path.Join(s.rootPath, id) + filePath := path.Join(folderPath, "description.json") + + return folderPath, filePath +} + +// Convert oci.Container to on-disk container format +func getContainerFileFromContainer(ctr *oci.Container) *containerFile { + ctrFile := containerFile{ + ID: ctr.ID(), + Name: ctr.Name(), + BundlePath: ctr.BundlePath(), + LogPath: ctr.LogPath(), + Labels: ctr.Labels(), + Annotations: ctr.Annotations(), + Image: ctr.Image(), + Sandbox: ctr.Sandbox(), + Terminal: ctr.Terminal(), + Privileged: ctr.Privileged(), + Metadata: ctr.Metadata(), + } + + return &ctrFile +} + +// Convert on-disk container format to normal oci.Container +func getContainerFromContainerFile(ctrFile *containerFile, ns ns.NetNS) (*oci.Container, error) { + return oci.NewContainer(ctrFile.ID, ctrFile.Name, ctrFile.BundlePath, ctrFile.LogPath, ns, ctrFile.Labels, ctrFile.Annotations, ctrFile.Image, ctrFile.Metadata, ctrFile.Sandbox, ctrFile.Terminal, ctrFile.Privileged) +} + +// Get a container from disk +func (s *FileState) getContainerFromDisk(id, sandboxID string, netNs ns.NetNS) (*oci.Container, error) { + ctrFile, err := s.getContainerFileFromDisk(id, sandboxID) + if err != nil { + return nil, err + } + + ctr, err := getContainerFromContainerFile(ctrFile, netNs) + if err != nil { + return nil, err + } + + // Sync the container with the runtime to get current state + if err := s.runtime.UpdateStatus(ctr); err != nil { + return nil, fmt.Errorf("error getting status of container %s: %v", ctr.ID(), err) + } + + return ctr, nil +} + +// Retrieve a container file from disk +func (s *FileState) getContainerFileFromDisk(id, sandboxID string) (*containerFile, error) { + ctrExists, err := s.checkContainerExistsOnDisk(id, sandboxID) + if err != nil { + return nil, err + } else if !ctrExists { + return nil, fmt.Errorf("container with ID %v in sandbox %v does not exist", id, sandboxID) + } + + ctrPath := s.getContainerPath(id, sandboxID) + ctrFile := containerFile{} + + if err := decodeFromFile(ctrPath, &ctrFile); err != nil { + return nil, fmt.Errorf("error retrieving containder ID %v from disk: %v", id, err) + } + + return &ctrFile, nil +} + +// Store a container on disk +// Cowardly refuses to replace containers that already exist on disk +// If parameter is set to true, will also update associated sandbox with new container +func (s *FileState) putContainerToDisk(ctr *oci.Container, updateSandbox bool) error { + ctrFile := getContainerFileFromContainer(ctr) + + if updateSandbox { + sbFile, err := s.getSandboxFileFromDisk(ctrFile.Sandbox) + if err != nil { + return err + } + + sbFile.Containers = append(sbFile.Containers, ctrFile.ID) + + if err := s.updateSandboxFileOnDisk(sbFile); err != nil { + return err + } + } + + return s.putContainerFileToDisk(ctrFile) +} + +// Put a container file to disk +// Will throw an error if a container with that ID already exists on disk +// Does not update associated sandbox +func (s *FileState) putContainerFileToDisk(ctrFile *containerFile) error { + ctrExists, err := s.checkContainerExistsOnDisk(ctrFile.ID, ctrFile.Sandbox) + if err != nil { + return err + } else if ctrExists { + return fmt.Errorf("container with ID %v already exists on disk, cowardly refusing to replace", ctrFile.ID) + } + + ctrPath := s.getContainerPath(ctrFile.ID, ctrFile.Sandbox) + + if err := encodeToFile(ctrPath, ctrFile); err != nil { + return fmt.Errorf("error storing container with ID %v: %v", ctrFile.ID, err) + } + + if err := s.lockfile.Touch(); err != nil { + logrus.Errorf("error updating lockfile writer: %v", err) + } + + return nil +} + +// Remove a container from disk, updating sandbox to remove references to it +func (s *FileState) removeContainerFromDisk(id, sandboxID string) error { + ctrExists, err := s.checkContainerExistsOnDisk(id, sandboxID) + if err != nil { + return err + } else if !ctrExists { + return fmt.Errorf("cannot remove container ID %v from sandbox %v as it does not exist", id, sandboxID) + } + + // Load, update, and store the sandbox descriptor file to reflect removed container + sbFile, err := s.getSandboxFileFromDisk(sandboxID) + if err != nil { + return err + } + + foundID := false + newCtrs := make([]string, 0, len(sbFile.Containers)) + for _, ctrID := range sbFile.Containers { + if ctrID == id { + foundID = true + } else { + newCtrs = append(newCtrs, ctrID) + } + } + + if !foundID { + return fmt.Errorf("error updating sandbox %v to remove container %v: container not found in sandbox containers listing", sandboxID, id) + } + + sbFile.Containers = newCtrs + + if err := s.updateSandboxFileOnDisk(sbFile); err != nil { + return err + } + + // Now remove container file + ctrPath := s.getContainerPath(id, sandboxID) + + if err := os.Remove(ctrPath); err != nil { + return fmt.Errorf("error removing container %v in sandbox %v: %v", id, sandboxID, err) + } + + return nil +} + +// Check if given container exists in given sandbox +func (s *FileState) checkContainerExistsOnDisk(id, sandboxID string) (bool, error) { + sbExists, err := s.checkSandboxExistsOnDisk(sandboxID) + if err != nil { + return false, fmt.Errorf("error checking sandbox %v: %v", sandboxID, err) + } else if !sbExists { + return false, nil + } + + ctrPath := s.getContainerPath(id, sandboxID) + if _, err := os.Stat(ctrPath); err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, fmt.Errorf("stat error on container file %v: %v", ctrPath, err) + } + + return true, nil +} + +// Get path of file representing a single container +func (s *FileState) getContainerPath(id, sandboxID string) string { + return path.Join(s.rootPath, sandboxID, (id + ".json")) +} + +// Encode given struct into a file with the given name +// Will refuse to replace files that already exist +func encodeToFile(fileName string, toEncode interface{}) error { + // Open file for writing + file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600) + if err != nil { + return fmt.Errorf("could not open file %v for writing: %v", fileName, err) + } + defer file.Close() + + encoder := json.NewEncoder(file) + encoder.SetEscapeHTML(false) + encoder.SetIndent("", "") + if err := encoder.Encode(toEncode); err != nil { + return fmt.Errorf("error encoding & storing struct: %v", err) + } + + return nil +} + +// Decode a single JSON structure (if multiple are present, the first will be used) from given file into given struct +func decodeFromFile(fileName string, decodeInto interface{}) error { + file, err := os.Open(fileName) + if err != nil { + return fmt.Errorf("error opening data file %v: %v", fileName, err) + } + defer file.Close() + + decoder := json.NewDecoder(file) + if err := decoder.Decode(decodeInto); err != nil { + return fmt.Errorf("error decoding contents of file %v: %v", fileName, err) + } + + return nil +} + +// Public API + +// NewFileState makes a new file-based state store at the given directory +func NewFileState(statePath string, runtime *oci.Runtime) (Store, error) { + state := new(FileState) + state.rootPath = statePath + state.memoryState = NewInMemoryState() + state.runtime = runtime + + if runtime == nil { + return nil, fmt.Errorf("must pass non-nil runtime to create state") + } + + // Make the root path if it does not exist + pathStat, err := os.Stat(statePath) + if err != nil { + if os.IsNotExist(err) { + if err2 := os.Mkdir(statePath, 0700); err2 != nil { + return nil, fmt.Errorf("unable to make root path directory %v: %v", statePath, err2) + } + } else { + return nil, fmt.Errorf("unable to stat root path of state: %v", err) + } + } else if !pathStat.IsDir() { + return nil, fmt.Errorf("root path %v already exists and is not a directory", statePath) + } + + // Retrieve the lockfile + lockfilePath := path.Join(statePath, "STATE_LOCK") + + lockfile, err := storage.GetLockfile(lockfilePath) + if err != nil { + return nil, fmt.Errorf("error retrieving lock: %v", err) + } + + state.lockfile = lockfile + + state.lockfile.Lock() + defer state.lockfile.Unlock() + + // Check if the lockfile is fresh + // If it is (Modified returns ENOSPC as there is no writer present), make us the writer + if _, err := lockfile.Modified(); err != nil { + if err == syscall.ENOSPC { + if err2 := lockfile.Touch(); err2 != nil { + return nil, fmt.Errorf("error adding writer to lockfile :%v", err2) + } + } else { + return nil, fmt.Errorf("error checking if lockfile modified: %v", err) + } + } + + // Perform an initial sync with the disk + if err := state.forceSyncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return state, nil +} + +// AddSandbox adds a sandbox and any containers in it to the state +func (s *FileState) AddSandbox(sb *sandbox.Sandbox) error { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return errSyncWithDisk(err) + } + + if err := s.memoryState.AddSandbox(sb); err != nil { + return err + } + + if err := s.putSandboxToDisk(sb); err != nil { + if err2 := s.memoryState.DeleteSandbox(sb.ID()); err2 != nil { + logrus.Errorf("error removing sandbox %s from in-memory state, states are desynced: %v", sb.ID(), err) + } + return fmt.Errorf("error saving sandbox %s to disk: %v", sb.ID(), err) + } + + return nil +} + +// HasSandbox checks if a sandbox exists in the state +func (s *FileState) HasSandbox(id string) bool { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + // TODO: maybe this function should return an error so we can better handle this? + if err := s.syncWithDisk(); err != nil { + return false + } + + return s.memoryState.HasSandbox(id) +} + +// DeleteSandbox removes the given sandbox from the state +// TODO make atomic +func (s *FileState) DeleteSandbox(id string) error { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return errSyncWithDisk(err) + } + + if err := s.memoryState.DeleteSandbox(id); err != nil { + return err + } + + if err := s.removeSandboxFromDisk(id); err != nil { + return fmt.Errorf("error removing sandbox ID %s from disk: %v", id, err) + } + + return nil +} + +// GetSandbox retrieves the given sandbox from the state +func (s *FileState) GetSandbox(id string) (*sandbox.Sandbox, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.GetSandbox(id) +} + +// LookupSandboxByName returns a sandbox given its full or partial name +func (s *FileState) LookupSandboxByName(name string) (*sandbox.Sandbox, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.LookupSandboxByName(name) +} + +// LookupSandboxByID returns a sandbox given its full or partial ID +// An error will be returned if the partial ID given is not unique +func (s *FileState) LookupSandboxByID(id string) (*sandbox.Sandbox, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.LookupSandboxByID(id) +} + +// GetAllSandboxes returns all sandboxes in the state +func (s *FileState) GetAllSandboxes() ([]*sandbox.Sandbox, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.GetAllSandboxes() +} + +// AddContainer adds a container to the state +func (s *FileState) AddContainer(c *oci.Container) error { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return errSyncWithDisk(err) + } + + if err := s.memoryState.AddContainer(c); err != nil { + return err + } + + if err := s.putContainerToDisk(c, true); err != nil { + if err2 := s.memoryState.DeleteContainer(c.ID(), c.Sandbox()); err2 != nil { + logrus.Errorf("error removing container %v from in-memory state, states are desynced: %v", c.ID(), err2) + } + return fmt.Errorf("error adding container %s to on-disk state: %v", c.ID(), err) + } + + return nil +} + +// HasContainer checks if a container exists in a given sandbox +func (s *FileState) HasContainer(id, sandboxID string) bool { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + // TODO: Should return (bool, error) to better represent this error? Sync failure is serious + if err := s.syncWithDisk(); err != nil { + return false + } + + return s.memoryState.HasContainer(id, sandboxID) +} + +// DeleteContainer removes a container from a given sandbox in the state +// TODO make atomic +func (s *FileState) DeleteContainer(id, sandboxID string) error { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return errSyncWithDisk(err) + } + + if err := s.memoryState.DeleteContainer(id, sandboxID); err != nil { + return err + } + + if err := s.removeContainerFromDisk(id, sandboxID); err != nil { + return fmt.Errorf("error removing container %s from on-disk state: %v", id, err) + } + + return nil +} + +// GetContainer retrieves the container with given ID from the given sandbox +func (s *FileState) GetContainer(id, sandboxID string) (*oci.Container, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.GetContainer(id, sandboxID) +} + +// GetContainerSandbox retrieves the sandbox of the container with given ID +func (s *FileState) GetContainerSandbox(id string) (string, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return "", errSyncWithDisk(err) + } + + return s.memoryState.GetContainerSandbox(id) +} + +// LookupContainerByName returns the full ID of a container given its full or partial name +func (s *FileState) LookupContainerByName(name string) (*oci.Container, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.LookupContainerByName(name) +} + +// LookupContainerByID returns the full ID of a container given a full or partial ID +// If the given ID is not unique, an error is returned +func (s *FileState) LookupContainerByID(id string) (*oci.Container, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.LookupContainerByID(id) +} + +// GetAllContainers returns all containers in the state, regardless of which sandbox they belong to +// Pod Infra containers are not included +func (s *FileState) GetAllContainers() ([]*oci.Container, error) { + s.lockfile.Lock() + defer s.lockfile.Unlock() + + if err := s.syncWithDisk(); err != nil { + return nil, errSyncWithDisk(err) + } + + return s.memoryState.GetAllContainers() +} + +func errSyncWithDisk(err error) error { + return fmt.Errorf("error syncing on-disk state: %v", err) +} diff --git a/server/state/in_memory_state.go b/server/state/in_memory_state.go new file mode 100644 index 00000000..b2d8d798 --- /dev/null +++ b/server/state/in_memory_state.go @@ -0,0 +1,389 @@ +package state + +import ( + "fmt" + "sync" + + "github.com/docker/docker/pkg/registrar" + "github.com/docker/docker/pkg/truncindex" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/sandbox" +) + +// TODO: make operations atomic to greatest extent possible + +// InMemoryState is an in-memory state store, suitable for use when no other +// programs are expected to interact with the server +type InMemoryState struct { + lock sync.Mutex + sandboxes map[string]*sandbox.Sandbox + containers oci.Store + podNameIndex *registrar.Registrar + podIDIndex *truncindex.TruncIndex + ctrNameIndex *registrar.Registrar + ctrIDIndex *truncindex.TruncIndex +} + +// NewInMemoryState creates a new, empty server state +func NewInMemoryState() Store { + state := new(InMemoryState) + state.sandboxes = make(map[string]*sandbox.Sandbox) + state.containers = oci.NewMemoryStore() + state.podNameIndex = registrar.NewRegistrar() + state.podIDIndex = truncindex.NewTruncIndex([]string{}) + state.ctrNameIndex = registrar.NewRegistrar() + state.ctrIDIndex = truncindex.NewTruncIndex([]string{}) + + return state +} + +// AddSandbox adds a sandbox and any containers in it to the state +func (s *InMemoryState) AddSandbox(sandbox *sandbox.Sandbox) error { + s.lock.Lock() + defer s.lock.Unlock() + + if sandbox == nil { + return fmt.Errorf("nil passed as sandbox to AddSandbox") + } + + if _, exist := s.sandboxes[sandbox.ID()]; exist { + return fmt.Errorf("sandbox with ID %v already exists", sandbox.ID()) + } + + // We shouldn't share ID with any containers, either + // Our pod infra container will share our ID and we don't want it to conflict with anything + if ctrCheck := s.containers.Get(sandbox.ID()); ctrCheck != nil { + return fmt.Errorf("requested sandbox ID %v conflicts with existing container ID", sandbox.ID()) + } + + s.sandboxes[sandbox.ID()] = sandbox + if err := s.podNameIndex.Reserve(sandbox.Name(), sandbox.ID()); err != nil { + return fmt.Errorf("error registering sandbox name: %v", err) + } + if err := s.podIDIndex.Add(sandbox.ID()); err != nil { + return fmt.Errorf("error registering sandbox ID: %v", err) + } + + // If there are containers in the sandbox add them to the mapping + containers := sandbox.Containers() + for _, ctr := range containers { + if err := s.addContainerMappings(ctr, true); err != nil { + return fmt.Errorf("error adding container %v mappings in sandbox %v", ctr.ID(), sandbox.ID()) + } + } + + // Add the pod infrastructure container to mappings + // TODO: Right now, we don't add it to the all containers listing. We may want to change this. + if err := s.addContainerMappings(sandbox.InfraContainer(), false); err != nil { + return fmt.Errorf("error adding infrastructure container %v to mappings: %v", sandbox.InfraContainer().ID(), err) + } + + return nil +} + +// HasSandbox determines if a given sandbox exists in the state +func (s *InMemoryState) HasSandbox(id string) bool { + s.lock.Lock() + defer s.lock.Unlock() + + _, exist := s.sandboxes[id] + + return exist +} + +// DeleteSandbox removes a sandbox from the state +func (s *InMemoryState) DeleteSandbox(id string) error { + s.lock.Lock() + defer s.lock.Unlock() + + if _, exist := s.sandboxes[id]; !exist { + return errNoSuchSandbox(id) + } + + name := s.sandboxes[id].Name() + containers := s.sandboxes[id].Containers() + infraContainer := s.sandboxes[id].InfraContainer() + + delete(s.sandboxes, id) + s.podNameIndex.Release(name) + if err := s.podIDIndex.Delete(id); err != nil { + return fmt.Errorf("error unregistering sandbox ID: %v", err) + } + + // If there are containers left in the sandbox delete them from the mappings + for _, ctr := range containers { + if err := s.deleteContainerMappings(ctr, true); err != nil { + return fmt.Errorf("error removing container %v mappings: %v", ctr.ID(), err) + } + } + + // Delete infra container from mappings + if err := s.deleteContainerMappings(infraContainer, false); err != nil { + return fmt.Errorf("error removing infra container %v from mappings: %v", infraContainer.ID(), err) + } + + return nil +} + +// GetSandbox returns a sandbox given its full ID +func (s *InMemoryState) GetSandbox(id string) (*sandbox.Sandbox, error) { + s.lock.Lock() + defer s.lock.Unlock() + + sandbox, ok := s.sandboxes[id] + if !ok { + return nil, errNoSuchSandbox(id) + } + + return sandbox, nil +} + +// LookupSandboxByName returns a sandbox given its full or partial name +func (s *InMemoryState) LookupSandboxByName(name string) (*sandbox.Sandbox, error) { + s.lock.Lock() + defer s.lock.Unlock() + + id, err := s.podNameIndex.Get(name) + if err != nil { + return nil, fmt.Errorf("could not resolve sandbox name %v: %v", name, err) + } + + sandbox, ok := s.sandboxes[id] + if !ok { + // This should never happen - our internal state must be desynced + return nil, fmt.Errorf("cannot find sandbox %v in sandboxes map", id) + } + + return sandbox, nil +} + +// LookupSandboxByID returns a sandbox given its full or partial ID +// An error will be returned if the partial ID given is not unique +func (s *InMemoryState) LookupSandboxByID(id string) (*sandbox.Sandbox, error) { + s.lock.Lock() + defer s.lock.Unlock() + + fullID, err := s.podIDIndex.Get(id) + if err != nil { + return nil, fmt.Errorf("could not resolve sandbox id %v: %v", id, err) + } + + sandbox, ok := s.sandboxes[fullID] + if !ok { + // This should never happen, internal state must be desynced + return nil, fmt.Errorf("cannot find sandbox %v in sandboxes map", fullID) + } + + return sandbox, nil +} + +// GetAllSandboxes returns all sandboxes in the state +func (s *InMemoryState) GetAllSandboxes() ([]*sandbox.Sandbox, error) { + s.lock.Lock() + defer s.lock.Unlock() + + sandboxes := make([]*sandbox.Sandbox, 0, len(s.sandboxes)) + for _, sb := range s.sandboxes { + sandboxes = append(sandboxes, sb) + } + + return sandboxes, nil +} + +// AddContainer adds a single container to a given sandbox in the state +func (s *InMemoryState) AddContainer(c *oci.Container) error { + s.lock.Lock() + defer s.lock.Unlock() + + if c == nil { + return fmt.Errorf("nil passed as container to AddContainer") + } + + sandbox, ok := s.sandboxes[c.Sandbox()] + if !ok { + return errNoSuchSandbox(c.Sandbox()) + } + + if ctr := sandbox.GetContainer(c.ID()); ctr != nil { + return fmt.Errorf("container with ID %v already exists in sandbox %v", c.ID(), c.Sandbox()) + } + + if sandbox.InfraContainer().ID() == c.ID() { + return fmt.Errorf("container is infra container of sandbox %s, refusing to add to containers list", c.Sandbox()) + } + + sandbox.AddContainer(c) + + return s.addContainerMappings(c, true) +} + +// Add container ID, Name and Sandbox mappings +func (s *InMemoryState) addContainerMappings(c *oci.Container, addToContainers bool) error { + if addToContainers && s.containers.Get(c.ID()) != nil { + return fmt.Errorf("container with ID %v already exists in containers store", c.ID()) + } + + // TODO: if not a pod infra container, check if it conflicts with existing sandbox ID? + // Does this matter? + + if addToContainers { + s.containers.Add(c.ID(), c) + } + if err := s.ctrNameIndex.Reserve(c.Name(), c.ID()); err != nil { + s.containers.Delete(c.ID()) + return fmt.Errorf("error registering container name: %v", err) + } + if err := s.ctrIDIndex.Add(c.ID()); err != nil { + s.containers.Delete(c.ID()) + s.ctrNameIndex.Release(c.ID()) + return fmt.Errorf("error registering container ID: %v", err) + } + + return nil +} + +// HasContainer checks if a container with the given ID exists in a given sandbox +func (s *InMemoryState) HasContainer(id, sandboxID string) bool { + s.lock.Lock() + defer s.lock.Unlock() + + sandbox, ok := s.sandboxes[sandboxID] + if !ok { + return false + } + + ctr := sandbox.GetContainer(id) + + return ctr != nil +} + +// DeleteContainer removes the container with given ID from the given sandbox +func (s *InMemoryState) DeleteContainer(id, sandboxID string) error { + s.lock.Lock() + defer s.lock.Unlock() + + sandbox, ok := s.sandboxes[sandboxID] + if !ok { + return errNoSuchSandbox(sandboxID) + } + + ctr := sandbox.GetContainer(id) + if ctr == nil { + return errNoSuchContainerInSandbox(id, sandboxID) + } + + sandbox.RemoveContainer(id) + + return s.deleteContainerMappings(ctr, true) +} + +// Deletes container from the ID and Name mappings and optionally from the global containers list +func (s *InMemoryState) deleteContainerMappings(ctr *oci.Container, deleteFromContainers bool) error { + if deleteFromContainers && s.containers.Get(ctr.ID()) == nil { + return fmt.Errorf("container ID %v does not exist in containers store", ctr.ID()) + } + + if deleteFromContainers { + s.containers.Delete(ctr.ID()) + } + s.ctrNameIndex.Release(ctr.Name()) + if err := s.ctrIDIndex.Delete(ctr.ID()); err != nil { + return fmt.Errorf("error unregistering container ID: %v", err) + } + + return nil +} + +// GetContainer returns the container with given ID in the given sandbox +func (s *InMemoryState) GetContainer(id, sandboxID string) (*oci.Container, error) { + s.lock.Lock() + defer s.lock.Unlock() + + return s.getContainerFromSandbox(id, sandboxID) +} + +// GetContainerSandbox returns the ID of a container's sandbox from the full container ID +// May not find the ID of pod infrastructure containers +func (s *InMemoryState) GetContainerSandbox(id string) (string, error) { + s.lock.Lock() + defer s.lock.Unlock() + + ctr := s.containers.Get(id) + if ctr == nil { + return "", errNoSuchContainer(id) + } + + return ctr.Sandbox(), nil +} + +// LookupContainerByName returns the full ID of a container given its full or partial name +func (s *InMemoryState) LookupContainerByName(name string) (*oci.Container, error) { + s.lock.Lock() + defer s.lock.Unlock() + + fullID, err := s.ctrNameIndex.Get(name) + if err != nil { + return nil, fmt.Errorf("cannot resolve container name %v: %v", name, err) + } + + return s.getContainer(fullID) +} + +// LookupContainerByID returns the full ID of a container given a full or partial ID +// If the given ID is not unique, an error is returned +func (s *InMemoryState) LookupContainerByID(id string) (*oci.Container, error) { + s.lock.Lock() + defer s.lock.Unlock() + + fullID, err := s.ctrIDIndex.Get(id) + if err != nil { + return nil, fmt.Errorf("cannot resolve container ID %v: %v", id, err) + } + + return s.getContainer(fullID) +} + +// GetAllContainers returns all containers in the state, regardless of which sandbox they belong to +// Pod Infra containers are not included +func (s *InMemoryState) GetAllContainers() ([]*oci.Container, error) { + return s.containers.List(), nil +} + +// Returns a single container from any sandbox based on full ID +// TODO: is it worth making this public as an alternative to GetContainer +func (s *InMemoryState) getContainer(id string) (*oci.Container, error) { + ctr := s.containers.Get(id) + if ctr == nil { + return nil, errNoSuchContainer(id) + } + + return s.getContainerFromSandbox(id, ctr.Sandbox()) +} + +// Returns a single container from a sandbox based on its full ID +// Internal implementation of GetContainer() but does not lock so it can be used in other functions +func (s *InMemoryState) getContainerFromSandbox(id, sandboxID string) (*oci.Container, error) { + sandbox, ok := s.sandboxes[sandboxID] + if !ok { + return nil, errNoSuchSandbox(sandboxID) + } + + ctr := sandbox.GetContainer(id) + if ctr == nil { + return nil, errNoSuchContainerInSandbox(id, sandboxID) + } + + return ctr, nil +} + +func errNoSuchSandbox(id string) error { + return fmt.Errorf("no sandbox with ID %s found", id) +} + +func errNoSuchContainer(id string) error { + return fmt.Errorf("no container with ID %s found", id) +} + +func errNoSuchContainerInSandbox(id string, sandboxID string) error { + return fmt.Errorf("no container with ID %s in sandbox %s", id, sandboxID) +} diff --git a/server/state/state_store.go b/server/state/state_store.go new file mode 100644 index 00000000..f96ce0de --- /dev/null +++ b/server/state/state_store.go @@ -0,0 +1,29 @@ +package state + +import ( + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/sandbox" +) + +// Store stores the state of the CRI-O server, including active pods and containers +type Store interface { + AddSandbox(s *sandbox.Sandbox) error + HasSandbox(id string) bool + DeleteSandbox(id string) error + // These should modify the associated sandbox without prompting + AddContainer(c *oci.Container) error + HasContainer(id, sandboxID string) bool + DeleteContainer(id, sandboxID string) error + // These two require full, explicit ID + GetSandbox(id string) (*sandbox.Sandbox, error) + GetContainer(id, sandboxID string) (*oci.Container, error) + // Get ID of sandbox container belongs to + GetContainerSandbox(id string) (string, error) + // Following 4 should accept partial names as long as they are globally unique + LookupSandboxByName(name string) (*sandbox.Sandbox, error) + LookupSandboxByID(id string) (*sandbox.Sandbox, error) + LookupContainerByName(name string) (*oci.Container, error) + LookupContainerByID(id string) (*oci.Container, error) + GetAllSandboxes() ([]*sandbox.Sandbox, error) + GetAllContainers() ([]*oci.Container, error) +}