diff --git a/oci/container.go b/oci/container.go index 9ac624e4..8ef5b716 100644 --- a/oci/container.go +++ b/oci/container.go @@ -1,11 +1,15 @@ package oci import ( + "encoding/json" "fmt" + "os" + "path/filepath" "sync" "time" "github.com/containernetworking/cni/pkg/ns" + "github.com/docker/docker/pkg/ioutils" specs "github.com/opencontainers/runtime-spec/specs-go" "k8s.io/apimachinery/pkg/fields" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" @@ -13,104 +17,91 @@ import ( // Container represents a runtime container. type Container struct { - id string - name string - bundlePath string - logPath string - labels fields.Set - annotations fields.Set - image *pb.ImageSpec - sandbox string + Id string + Name string + BundlePath string + LogPath string + Labels fields.Set + Annotations fields.Set + Image *pb.ImageSpec + Sandbox string netns ns.NetNS - terminal bool - privileged bool - state *ContainerState - metadata *pb.ContainerMetadata + Terminal bool + Privileged bool + State *ContainerState + Metadata *pb.ContainerMetadata opLock sync.Mutex + StateDir string } // ContainerState represents the status of a container. type ContainerState struct { specs.State - Created time.Time `json:"created"` - Started time.Time `json:"started"` - Finished time.Time `json:"finished"` - ExitCode int32 `json:"exitCode"` + Created time.Time `json:"created,omitempty"` + Started time.Time `json:"started,omitempty"` + Finished time.Time `json:"finished,omitempty"` + ExitCode int32 `json:"exitCode,omitempty"` } // NewContainer creates a container object. -func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, privileged bool) (*Container, error) { +func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, privileged bool, stateDir string) (*Container, error) { c := &Container{ - id: id, - name: name, - bundlePath: bundlePath, - logPath: logPath, - labels: labels, - sandbox: sandbox, + Id: id, + Name: name, + BundlePath: bundlePath, + LogPath: logPath, + Labels: labels, + Sandbox: sandbox, netns: netns, - terminal: terminal, - privileged: privileged, - metadata: metadata, - annotations: annotations, - image: image, + Terminal: terminal, + Privileged: privileged, + Metadata: metadata, + Annotations: annotations, + Image: image, + StateDir: stateDir, } return c, nil } -// Name returns the name of the container. -func (c *Container) Name() string { - return c.name +func (c *Container) toDisk() error { + pth := filepath.Join(c.StateDir, "state") + jsonSource, err := ioutils.NewAtomicFileWriter(pth, 0644) + if err != nil { + return err + } + defer jsonSource.Close() + enc := json.NewEncoder(jsonSource) + return enc.Encode(c) +} + +func (c *Container) FromDisk() error { + pth := filepath.Join(c.StateDir, "state") + + jsonSource, err := os.Open(pth) + if err != nil { + return err + } + defer jsonSource.Close() + + dec := json.NewDecoder(jsonSource) + + return dec.Decode(c) } // ID returns the id of the container. func (c *Container) ID() string { - return c.id -} - -// BundlePath returns the bundlePath of the container. -func (c *Container) BundlePath() string { - return c.bundlePath -} - -// LogPath returns the log path of the container. -func (c *Container) LogPath() string { - return c.logPath -} - -// Labels returns the labels of the container. -func (c *Container) Labels() map[string]string { - return c.labels -} - -// Annotations returns the annotations of the container. -func (c *Container) Annotations() map[string]string { - return c.annotations -} - -// Image returns the image of the container. -func (c *Container) Image() *pb.ImageSpec { - return c.image -} - -// Sandbox returns the sandbox name of the container. -func (c *Container) Sandbox() string { - return c.sandbox + return c.Id } // NetNsPath returns the path to the network namespace of the container. func (c *Container) NetNsPath() (string, error) { - if c.state == nil { + if c.State == nil { return "", fmt.Errorf("container state is not populated") } if c.netns == nil { - return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil + return fmt.Sprintf("/proc/%d/ns/net", c.State.Pid), nil } return c.netns.Path(), nil } - -// Metadata returns the metadata of the container. -func (c *Container) Metadata() *pb.ContainerMetadata { - return c.metadata -} diff --git a/oci/history.go b/oci/history.go index 2ced41d6..d76b9867 100644 --- a/oci/history.go +++ b/oci/history.go @@ -16,7 +16,7 @@ func (history *History) Len() int { func (history *History) Less(i, j int) bool { containers := *history // FIXME: state access should be serialized - return containers[j].state.Created.Before(containers[i].state.Created) + return containers[j].State.Created.Before(containers[i].State.Created) } // Swap switches containers i and j positions in the history. diff --git a/oci/oci.go b/oci/oci.go index 1f927102..051e6a41 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -71,7 +71,7 @@ func (r *Runtime) Name() string { // Depending if the container is privileged, it will return // the privileged runtime or not. func (r *Runtime) Path(c *Container) string { - if c.privileged && r.privilegedPath != "" { + if c.Privileged && r.privilegedPath != "" { return r.privilegedPath } @@ -110,12 +110,12 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { if r.cgroupManager == "systemd" { args = append(args, "-s") } - args = append(args, "-c", c.name) + args = append(args, "-c", c.Name) args = append(args, "-r", r.Path(c)) - args = append(args, "-b", c.bundlePath) - args = append(args, "-p", filepath.Join(c.bundlePath, "pidfile")) - args = append(args, "-l", c.logPath) - if c.terminal { + args = append(args, "-b", c.BundlePath) + args = append(args, "-p", filepath.Join(c.BundlePath, "pidfile")) + args = append(args, "-l", c.LogPath) + if c.Terminal { args = append(args, "-t") } logrus.WithFields(logrus.Fields{ @@ -123,7 +123,7 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { }).Debugf("running conmon: %s", r.conmonPath) cmd := exec.Command(r.conmonPath, args...) - cmd.Dir = c.bundlePath + cmd.Dir = c.BundlePath cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } @@ -146,8 +146,8 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { // Move conmon to specified cgroup if cgroupParent != "" { if r.cgroupManager == "systemd" { - logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("ocid", c.name)) - if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("ocid", c.name)); err != nil { + logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("ocid", c.Name)) + if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("ocid", c.Name)); err != nil { logrus.Warnf("Failed to add conmon to sandbox cgroup: %v", err) } } @@ -177,6 +177,10 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { case <-time.After(ContainerCreateTimeout): return fmt.Errorf("create container timeout") } + + c.State = &ContainerState{} + c.State.Created = time.Now() + return nil } @@ -188,10 +192,10 @@ func createUnitName(prefix string, name string) string { func (r *Runtime) StartContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "start", c.name); err != nil { + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "start", c.Name); err != nil { return err } - c.state.Started = time.Now() + c.State.Started = time.Now() return nil } @@ -282,7 +286,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp } }() - logFile, err := ioutil.TempFile("", "ocid-log-"+c.name) + logFile, err := ioutil.TempFile("", "ocid-log-"+c.Name) if err != nil { return nil, ExecSyncError{ ExitCode: -1, @@ -296,11 +300,11 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp }() var args []string - args = append(args, "-c", c.name) + args = append(args, "-c", c.Name) args = append(args, "-r", r.Path(c)) args = append(args, "-p", pidFile.Name()) args = append(args, "-e") - if c.terminal { + if c.Terminal { args = append(args, "-t") } args = append(args, "-l", logPath) @@ -439,20 +443,23 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp func (r *Runtime) StopContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", c.name, "TERM"); err != nil { + if _, err := utils.ExecCmd(r.Path(c), "kill", c.Name, "TERM"); err != nil { + if strings.Contains(err.Error(), "does not exist") { + return nil + } return err } i := 0 for { if i == 1000 { - err := unix.Kill(c.state.Pid, syscall.SIGKILL) + err := unix.Kill(c.State.Pid, syscall.SIGKILL) if err != nil && err != syscall.ESRCH { return fmt.Errorf("failed to kill process: %v", err) } break } // Check if the process is still around - err := unix.Kill(c.state.Pid, 0) + err := unix.Kill(c.State.Pid, 0) if err == syscall.ESRCH { break } @@ -460,6 +467,8 @@ func (r *Runtime) StopContainer(c *Container) error { i++ } + c.State.Finished = time.Now() + return nil } @@ -467,30 +476,34 @@ func (r *Runtime) StopContainer(c *Container) error { func (r *Runtime) DeleteContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "delete", c.name) + _, err := utils.ExecCmd(r.Path(c), "delete", c.Name) + if err != nil && !strings.Contains(err.Error(), "does not exist") { + return err + } + return nil } // UpdateStatus refreshes the status of the container. func (r *Runtime) UpdateStatus(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - out, err := exec.Command(r.Path(c), "state", c.name).CombinedOutput() + out, err := exec.Command(r.Path(c), "state", c.Name).CombinedOutput() if err != nil { - return fmt.Errorf("error getting container state for %s: %s: %q", c.name, err, out) + return fmt.Errorf("error getting container state for %s: %s: %q", c.Name, err, out) } - if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(&c.state); err != nil { - return fmt.Errorf("failed to decode container status for %s: %s", c.name, err) + if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(&c.State); err != nil { + return fmt.Errorf("failed to decode container status for %s: %s", c.Name, err) } - if c.state.Status == ContainerStateStopped { - exitFilePath := filepath.Join(c.bundlePath, "exit") + if c.State.Status == ContainerStateStopped { + exitFilePath := filepath.Join(c.BundlePath, "exit") fi, err := os.Stat(exitFilePath) if err != nil { logrus.Warnf("failed to find container exit file: %v", err) - c.state.ExitCode = -1 + c.State.ExitCode = -1 } else { st := fi.Sys().(*syscall.Stat_t) - c.state.Finished = time.Unix(st.Ctim.Sec, st.Ctim.Nsec) + c.State.Finished = time.Unix(st.Ctim.Sec, st.Ctim.Nsec) statusCodeStr, err := ioutil.ReadFile(exitFilePath) if err != nil { @@ -500,10 +513,12 @@ func (r *Runtime) UpdateStatus(c *Container) error { if err != nil { return fmt.Errorf("status code conversion failed: %v", err) } - c.state.ExitCode = int32(statusCode) + c.State.ExitCode = int32(statusCode) } } + c.toDisk() + return nil } @@ -511,7 +526,7 @@ func (r *Runtime) UpdateStatus(c *Container) error { func (r *Runtime) ContainerStatus(c *Container) *ContainerState { c.opLock.Lock() defer c.opLock.Unlock() - return c.state + return c.State } // newPipe creates a unix socket pair for communication diff --git a/server/container_create.go b/server/container_create.go index 38fc3c6f..8c898922 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -480,7 +480,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, specgen.AddAnnotation("ocid/name", containerName) specgen.AddAnnotation("ocid/sandbox_id", sb.id) - specgen.AddAnnotation("ocid/sandbox_name", sb.infraContainer.Name()) + specgen.AddAnnotation("ocid/sandbox_name", sb.infraContainer.Name) specgen.AddAnnotation("ocid/container_type", containerTypeContainer) specgen.AddAnnotation("ocid/log_path", logPath) specgen.AddAnnotation("ocid/tty", fmt.Sprintf("%v", containerConfig.Tty)) @@ -594,7 +594,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty, sb.privileged) + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty, sb.privileged, containerInfo.Dir) if err != nil { return nil, err } diff --git a/server/container_exec.go b/server/container_exec.go index 8ca1103d..df391cb0 100644 --- a/server/container_exec.go +++ b/server/container_exec.go @@ -45,7 +45,7 @@ func (ss streamService) Exec(containerID string, cmd []string, stdin io.Reader, if tty { args = append(args, "-t") } - args = append(args, c.Name()) + args = append(args, c.Name) args = append(args, cmd...) execCmd := exec.Command(ss.runtimeServer.runtime.Path(c), args...) var cmdErr error diff --git a/server/container_list.go b/server/container_list.go index 9e372a5f..aa0fabb2 100644 --- a/server/container_list.go +++ b/server/container_list.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/Sirupsen/logrus" "github.com/kubernetes-incubator/cri-o/oci" "golang.org/x/net/context" @@ -43,7 +45,7 @@ func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersReque c := s.state.containers.Get(id) if c != nil { if filter.PodSandboxId != "" { - if c.Sandbox() == filter.PodSandboxId { + if c.Sandbox == filter.PodSandboxId { ctrList = []*oci.Container{c} } else { ctrList = []*oci.Container{} @@ -67,23 +69,27 @@ func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersReque for _, ctr := range ctrList { if err := s.runtime.UpdateStatus(ctr); err != nil { - return nil, err + logrus.Warnf("error updating status for ctr %s: %v", ctr.ID, err) } - podSandboxID := ctr.Sandbox() + podSandboxID := ctr.Sandbox cState := s.runtime.ContainerStatus(ctr) - created := cState.Created.UnixNano() + // TODO: we must be saving container creation somewhere on disk for when we restore + created := time.Time{}.UnixNano() rState := pb.ContainerState_CONTAINER_UNKNOWN + if cState != nil { + created = cState.Created.UnixNano() + } cID := ctr.ID() c := &pb.Container{ Id: cID, PodSandboxId: podSandboxID, CreatedAt: created, - Labels: ctr.Labels(), - Metadata: ctr.Metadata(), - Annotations: ctr.Annotations(), - Image: ctr.Image(), + Labels: ctr.Labels, + Metadata: ctr.Metadata, + Annotations: ctr.Annotations, + Image: ctr.Image, } switch cState.Status { diff --git a/server/container_remove.go b/server/container_remove.go index 028ffed8..0c4acaa5 100644 --- a/server/container_remove.go +++ b/server/container_remove.go @@ -19,7 +19,7 @@ func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerReq } if err := s.runtime.UpdateStatus(c); err != nil { - return nil, fmt.Errorf("failed to update container state: %v", err) + logrus.Debugf("failed to update container %s state: %v", c.ID, err) } cState := s.runtime.ContainerStatus(c) @@ -43,12 +43,16 @@ func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerReq return nil, fmt.Errorf("failed to delete storage for container %s: %v", c.ID(), err) } - s.releaseContainerName(c.Name()) + s.releaseContainerName(c.Name) if err := s.ctrIDIndex.Delete(c.ID()); err != nil { return nil, err } + if err := s.runtime.UpdateStatus(c); err != nil { + logrus.Debugf("failed to update container %s state: %v", c.ID, err) + } + resp := &pb.RemoveContainerResponse{} logrus.Debugf("RemoveContainerResponse: %+v", resp) return resp, nil diff --git a/server/container_status.go b/server/container_status.go index 82a51877..4184bf8c 100644 --- a/server/container_status.go +++ b/server/container_status.go @@ -2,6 +2,7 @@ package server import ( "encoding/json" + "time" "github.com/Sirupsen/logrus" "github.com/kubernetes-incubator/cri-o/oci" @@ -19,17 +20,17 @@ func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusReq } if err = s.runtime.UpdateStatus(c); err != nil { - return nil, err + logrus.Debugf("failed to get container status for %s: %v", c.ID, err) } containerID := c.ID() - image := c.Image() + image := c.Image resp := &pb.ContainerStatusResponse{ Status: &pb.ContainerStatus{ Id: containerID, - Metadata: c.Metadata(), - Labels: c.Labels(), - Annotations: c.Annotations(), + Metadata: c.Metadata, + Labels: c.Labels, + Annotations: c.Annotations, Image: image, }, } @@ -50,26 +51,31 @@ func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusReq cState := s.runtime.ContainerStatus(c) rStatus := pb.ContainerState_CONTAINER_UNKNOWN - switch cState.Status { - case oci.ContainerStateCreated: - rStatus = pb.ContainerState_CONTAINER_CREATED - created := cState.Created.UnixNano() - resp.Status.CreatedAt = created - case oci.ContainerStateRunning: - rStatus = pb.ContainerState_CONTAINER_RUNNING - created := cState.Created.UnixNano() - resp.Status.CreatedAt = created - started := cState.Started.UnixNano() - resp.Status.StartedAt = started - case oci.ContainerStateStopped: - rStatus = pb.ContainerState_CONTAINER_EXITED - created := cState.Created.UnixNano() - resp.Status.CreatedAt = created - started := cState.Started.UnixNano() - resp.Status.StartedAt = started - finished := cState.Finished.UnixNano() - resp.Status.FinishedAt = finished - resp.Status.ExitCode = cState.ExitCode + if cState != nil { + switch cState.Status { + case oci.ContainerStateCreated: + rStatus = pb.ContainerState_CONTAINER_CREATED + created := cState.Created.UnixNano() + resp.Status.CreatedAt = created + case oci.ContainerStateRunning: + rStatus = pb.ContainerState_CONTAINER_RUNNING + created := cState.Created.UnixNano() + resp.Status.CreatedAt = created + started := cState.Started.UnixNano() + resp.Status.StartedAt = started + case oci.ContainerStateStopped: + rStatus = pb.ContainerState_CONTAINER_EXITED + created := cState.Created.UnixNano() + resp.Status.CreatedAt = created + started := cState.Started.UnixNano() + resp.Status.StartedAt = started + finished := cState.Finished.UnixNano() + resp.Status.FinishedAt = finished + resp.Status.ExitCode = cState.ExitCode + default: + resp.Status.CreatedAt = time.Time{}.UnixNano() + resp.Status.StartedAt = time.Time{}.UnixNano() + } } resp.Status.State = rStatus diff --git a/server/container_stop.go b/server/container_stop.go index 58865edf..992ad2f4 100644 --- a/server/container_stop.go +++ b/server/container_stop.go @@ -18,14 +18,17 @@ func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest } if err := s.runtime.UpdateStatus(c); err != nil { - return nil, err + logrus.Debugf("failed to update container %s state: %v", c.ID, err) } cStatus := s.runtime.ContainerStatus(c) - if cStatus.Status != oci.ContainerStateStopped { + if cStatus != nil && cStatus.Status != oci.ContainerStateStopped { if err := s.runtime.StopContainer(c); err != nil { return nil, fmt.Errorf("failed to stop container %s: %v", c.ID(), err) } } + if err := s.runtime.UpdateStatus(c); err != nil { + logrus.Debugf("failed to update container %s state: %v", c.ID, err) + } resp := &pb.StopContainerResponse{} logrus.Debugf("StopContainerResponse: %+v", resp) diff --git a/server/sandbox.go b/server/sandbox.go index 0f57f557..6f592ade 100644 --- a/server/sandbox.go +++ b/server/sandbox.go @@ -159,7 +159,7 @@ var ( ) func (s *sandbox) addContainer(c *oci.Container) { - s.containers.Add(c.Name(), c) + s.containers.Add(c.Name, c) } func (s *sandbox) getContainer(name string) *oci.Container { @@ -167,7 +167,7 @@ func (s *sandbox) getContainer(name string) *oci.Container { } func (s *sandbox) removeContainer(c *oci.Container) { - s.containers.Delete(c.Name()) + s.containers.Delete(c.Name) } func (s *sandbox) netNs() ns.NetNS { diff --git a/server/sandbox_list.go b/server/sandbox_list.go index 9e4b3562..2139115e 100644 --- a/server/sandbox_list.go +++ b/server/sandbox_list.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/Sirupsen/logrus" "github.com/kubernetes-incubator/cri-o/oci" "golang.org/x/net/context" @@ -60,13 +62,17 @@ func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxReque continue } if err := s.runtime.UpdateStatus(podInfraContainer); err != nil { - return nil, err + logrus.Warnf("error updating status for pod %s: %v", sb.id, err) } cState := s.runtime.ContainerStatus(podInfraContainer) - created := cState.Created.UnixNano() + // TODO: we must be saving pod creation somewhere on disk for when we restore + created := time.Time{}.UnixNano() rStatus := pb.PodSandboxState_SANDBOX_NOTREADY - if cState.Status == oci.ContainerStateRunning { - rStatus = pb.PodSandboxState_SANDBOX_READY + if cState != nil { + created = cState.Created.UnixNano() + if cState.Status == oci.ContainerStateRunning { + rStatus = pb.PodSandboxState_SANDBOX_READY + } } pod := &pb.PodSandbox{ diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go index 9628c6b5..132fa1d6 100644 --- a/server/sandbox_remove.go +++ b/server/sandbox_remove.go @@ -33,19 +33,19 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR // Delete all the containers in the sandbox for _, c := range containers { if err := s.runtime.UpdateStatus(c); err != nil { - return nil, fmt.Errorf("failed to update container state: %v", err) + logrus.Debugf("failed to update container %s state: %v", c.ID, err) } cState := s.runtime.ContainerStatus(c) - if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning { + if cState != nil && (cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning) { if err := s.runtime.StopContainer(c); err != nil { // Assume container is already stopped - logrus.Warnf("failed to stop container %s: %v", c.Name(), err) + logrus.Warnf("failed to stop container %s: %v", c.Name, err) } } if err := s.runtime.DeleteContainer(c); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + logrus.Warnf("failed to delete container %s in pod sandbox %s: %v", c.Name, sb.id, err) } if c.ID() == podInfraContainer.ID() { @@ -53,16 +53,16 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR } if err := s.storageRuntimeServer.StopContainer(c.ID()); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name, sb.id, err) } if err := s.storageRuntimeServer.DeleteContainer(c.ID()); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name, sb.id, err) } - s.releaseContainerName(c.Name()) + s.releaseContainerName(c.Name) s.removeContainer(c) if err := s.ctrIDIndex.Delete(c.ID()); err != nil { - return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s from index: %v", c.Name(), sb.id, err) + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s from index: %v", c.Name, sb.id, err) } } @@ -91,7 +91,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.id, err) } - s.releaseContainerName(podInfraContainer.Name()) + s.releaseContainerName(podInfraContainer.Name) if err := s.ctrIDIndex.Delete(podInfraContainer.ID()); err != nil { return nil, fmt.Errorf("failed to delete infra container %s in pod sandbox %s from index: %v", podInfraContainer.ID(), sb.id, err) } @@ -106,17 +106,3 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR logrus.Debugf("RemovePodSandboxResponse %+v", resp) return resp, nil } - -// RemoveAllPodSandboxes removes all pod sandboxes -func (s *Server) RemoveAllPodSandboxes() { - logrus.Debugf("RemoveAllPodSandboxes") - s.Update() - for _, sb := range s.state.sandboxes { - pod := &pb.RemovePodSandboxRequest{ - PodSandboxId: sb.id, - } - if _, err := s.RemovePodSandbox(nil, pod); err != nil { - logrus.Warnf("could not RemovePodSandbox %s: %v", sb.id, err) - } - } -} diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 88797343..29a77fdb 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "regexp" "strconv" "syscall" @@ -17,6 +18,10 @@ import ( pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" ) +var ( + podConflictRE = regexp.MustCompile(`already reserved for pod "([0-9a-z]+)"`) +) + // privilegedSandbox returns true if the sandbox configuration // requires additional host privileges for the sandbox. func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool { @@ -81,8 +86,21 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest id, name, err := s.generatePodIDandName(kubeName, namespace, attempt) if err != nil { - return nil, err + matches := podConflictRE.FindStringSubmatch(err.Error()) + if len(matches) != 2 { + return nil, err + } + podID := matches[1] + _, err = s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: podID}) + if err != nil { + return nil, err + } + id, name, err = s.generatePodIDandName(kubeName, namespace, attempt) + if err != nil { + return nil, err + } } + _, containerName, err := s.generateContainerIDandName(name, "infra", attempt) if err != nil { return nil, err @@ -398,7 +416,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err) } - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged, podContainer.Dir) if err != nil { return nil, err } diff --git a/server/sandbox_status.go b/server/sandbox_status.go index 15d35260..688807a3 100644 --- a/server/sandbox_status.go +++ b/server/sandbox_status.go @@ -1,6 +1,8 @@ package server import ( + "time" + "github.com/Sirupsen/logrus" "github.com/kubernetes-incubator/cri-o/oci" "golang.org/x/net/context" @@ -17,11 +19,14 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR podInfraContainer := sb.infraContainer if err = s.runtime.UpdateStatus(podInfraContainer); err != nil { - return nil, err + logrus.Debugf("failed to get sandbox status for %s: %v", sb.id, err) } cState := s.runtime.ContainerStatus(podInfraContainer) - created := cState.Created.UnixNano() + created := time.Time{}.UnixNano() + if cState != nil { + created = cState.Created.UnixNano() + } netNsPath, err := podInfraContainer.NetNsPath() if err != nil { @@ -34,7 +39,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR } rStatus := pb.PodSandboxState_SANDBOX_NOTREADY - if cState.Status == oci.ContainerStateRunning { + if cState != nil && cState.Status == oci.ContainerStateRunning { rStatus = pb.PodSandboxState_SANDBOX_READY } diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go index a6f8d32b..261eb0f1 100644 --- a/server/sandbox_stop.go +++ b/server/sandbox_stop.go @@ -27,7 +27,7 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque if _, err := os.Stat(netnsPath); err == nil { if err2 := s.netPlugin.TearDownPod(netnsPath, sb.namespace, sb.kubeName, sb.id); err2 != nil { return nil, fmt.Errorf("failed to destroy network for container %s in sandbox %s: %v", - podInfraContainer.Name(), sb.id, err2) + podInfraContainer.Name, sb.id, err2) } } else if !os.IsNotExist(err) { // it's ok for netnsPath to *not* exist return nil, fmt.Errorf("failed to stat netns path for container %s in sandbox %s before tearing down the network: %v", @@ -44,12 +44,12 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque for _, c := range containers { if err := s.runtime.UpdateStatus(c); err != nil { - return nil, err + logrus.Debugf("failed to update status for container %s: %v", c.ID, err) } cStatus := s.runtime.ContainerStatus(c) - if cStatus.Status != oci.ContainerStateStopped { + if cStatus != nil && cStatus.Status != oci.ContainerStateStopped { if err := s.runtime.StopContainer(c); err != nil { - return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.id, err) + logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name, sb.id, err) } } } @@ -58,3 +58,15 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque logrus.Debugf("StopPodSandboxResponse: %+v", resp) return resp, nil } + +func (s *Server) stopAllPodSandboxes() { + logrus.Debugf("stopAllPodSandboxes") + for _, sb := range s.state.sandboxes { + pod := &pb.StopPodSandboxRequest{ + PodSandboxId: sb.id, + } + if _, err := s.StopPodSandbox(nil, pod); err != nil { + logrus.Warnf("could not StopPodSandbox %s: %v", sb.id, err) + } + } +} diff --git a/server/server.go b/server/server.go index ff265197..ff1d2cfc 100644 --- a/server/server.go +++ b/server/server.go @@ -133,12 +133,18 @@ func (s *Server) loadContainer(id string) error { return err } - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty, sb.privileged) + cDir, err := s.store.GetContainerDirectory(id) if err != nil { return err } + + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty, sb.privileged, cDir) + if err != nil { + return err + } + ctr.FromDisk() if err = s.runtime.UpdateStatus(ctr); err != nil { - return fmt.Errorf("error updating status for container %s: %v", ctr.ID(), err) + logrus.Debugf("error updating status for container %s: %v", ctr.ID(), err) } s.addContainer(ctr) return s.ctrIDIndex.Add(id) @@ -253,12 +259,18 @@ func (s *Server) loadSandbox(id string) error { } }() - scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, nil, nil, id, false, privileged) + cDir, err := s.store.GetContainerDirectory(id) if err != nil { return err } + + scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, nil, nil, id, false, privileged, cDir) + if err != nil { + return err + } + scontainer.FromDisk() if err = s.runtime.UpdateStatus(scontainer); err != nil { - return fmt.Errorf("error updating status for pod sandbox infra container %s: %v", scontainer.ID(), err) + logrus.Debugf("error updating status for pod sandbox infra container %s: %v", scontainer.ID(), err) } if err = label.ReserveLabel(processLabel); err != nil { return err @@ -366,7 +378,7 @@ func (s *Server) update() error { logrus.Warnf("bad state when getting container removed %+v", removedPodContainer) continue } - s.releaseContainerName(c.Name()) + s.releaseContainerName(c.Name) s.removeContainer(c) if err = s.ctrIDIndex.Delete(c.ID()); err != nil { return err @@ -387,7 +399,7 @@ func (s *Server) update() error { continue } podInfraContainer := sb.infraContainer - s.releaseContainerName(podInfraContainer.Name()) + s.releaseContainerName(podInfraContainer.Name) s.removeContainer(podInfraContainer) if err = s.ctrIDIndex.Delete(podInfraContainer.ID()); err != nil { return err @@ -462,7 +474,7 @@ func (s *Server) cleanupSandboxesOnShutdown() { _, err := os.Stat(shutdownFile) if err == nil || !os.IsNotExist(err) { logrus.Debugf("shutting down all sandboxes, on shutdown") - s.RemoveAllPodSandboxes() + s.stopAllPodSandboxes() err = os.Remove(shutdownFile) if err != nil { logrus.Warnf("Failed to remove %q", shutdownFile) @@ -606,7 +618,7 @@ func (s *Server) removeSandbox(id string) { func (s *Server) addContainer(c *oci.Container) { s.stateLock.Lock() - sandbox := s.state.sandboxes[c.Sandbox()] + sandbox := s.state.sandboxes[c.Sandbox] // TODO(runcom): handle !ok above!!! otherwise it panics! sandbox.addContainer(c) s.state.containers.Add(c.ID(), c) @@ -622,7 +634,7 @@ func (s *Server) getContainer(id string) *oci.Container { func (s *Server) removeContainer(c *oci.Container) { s.stateLock.Lock() - sandbox := s.state.sandboxes[c.Sandbox()] + sandbox := s.state.sandboxes[c.Sandbox] sandbox.removeContainer(c) s.state.containers.Delete(c.ID()) s.stateLock.Unlock() diff --git a/utils/utils.go b/utils/utils.go index 340e1ba9..1635e4b4 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -23,7 +23,7 @@ func ExecCmd(name string, args ...string) (string, error) { err := cmd.Run() if err != nil { - return "", fmt.Errorf("`%v %v` failed: %v (%v)", name, strings.Join(args, " "), stderr.String(), err) + return "", fmt.Errorf("`%v %v` failed: %v %v (%v)", name, strings.Join(args, " "), stderr.String(), stdout.String(), err) } return stdout.String(), nil