Merge pull request #505 from runcom/restore-2

Fix restore
This commit is contained in:
Mrunal Patel 2017-05-19 08:39:28 -07:00 committed by GitHub
commit fb83c7e619
15 changed files with 200 additions and 34 deletions

View file

@ -44,7 +44,7 @@ RUN mkdir -p /usr/src/criu \
&& rm -rf /usr/src/criu
# Install runc
ENV RUNC_COMMIT v1.0.0-rc3
ENV RUNC_COMMIT 639454475cb9c8b861cc599f8bcd5c8c790ae402
RUN set -x \
&& export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \

View file

@ -1,7 +1,10 @@
package oci
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"
@ -15,7 +18,6 @@ import (
type Container struct {
id string
name string
bundlePath string
logPath string
labels fields.Set
annotations fields.Set
@ -27,19 +29,25 @@ type Container struct {
state *ContainerState
metadata *pb.ContainerMetadata
opLock sync.Mutex
// this is the /var/run/storage/... directory, erased on reboot
bundlePath string
// this is the /var/lib/storage/... directory
dir string
}
// ContainerState represents the status of a container.
type ContainerState struct {
specs.State
Created time.Time `json:"created"`
Started time.Time `json:"started"`
Finished time.Time `json:"finished"`
ExitCode int32 `json:"exitCode"`
Started time.Time `json:"started,omitempty"`
Finished time.Time `json:"finished,omitempty"`
ExitCode int32 `json:"exitCode,omitempty"`
}
// NewContainer creates a container object.
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, privileged bool) (*Container, error) {
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, privileged bool, dir string, created time.Time) (*Container, error) {
state := &ContainerState{}
state.Created = created
c := &Container{
id: id,
name: name,
@ -53,10 +61,34 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net
metadata: metadata,
annotations: annotations,
image: image,
dir: dir,
state: state,
}
return c, nil
}
// FromDisk restores container's state from disk
func (c *Container) FromDisk() error {
jsonSource, err := os.Open(c.StatePath())
if err != nil {
return err
}
defer jsonSource.Close()
dec := json.NewDecoder(jsonSource)
return dec.Decode(c.state)
}
// StatePath returns the containers state.json path
func (c *Container) StatePath() string {
return filepath.Join(c.dir, "state.json")
}
// CreatedAt returns the container creation time
func (c *Container) CreatedAt() time.Time {
return c.state.Created
}
// Name returns the name of the container.
func (c *Container) Name() string {
return c.name

View file

@ -501,6 +501,8 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error {
}
}
c.state.Finished = time.Now()
return nil
}
@ -508,7 +510,8 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error {
func (r *Runtime) DeleteContainer(c *Container) error {
c.opLock.Lock()
defer c.opLock.Unlock()
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "delete", c.name)
_, err := utils.ExecCmd(r.Path(c), "delete", "--force", c.name)
return err
}
// UpdateStatus refreshes the status of the container.
@ -517,6 +520,12 @@ func (r *Runtime) UpdateStatus(c *Container) error {
defer c.opLock.Unlock()
out, err := exec.Command(r.Path(c), "state", c.name).CombinedOutput()
if err != nil {
if err := unix.Kill(c.state.Pid, 0); err == syscall.ESRCH {
c.state.Status = ContainerStateStopped
c.state.Finished = time.Now()
c.state.ExitCode = 255
return nil
}
return fmt.Errorf("error getting container state for %s: %s: %q", c.name, err, out)
}
if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(&c.state); err != nil {

View file

@ -10,6 +10,7 @@ import (
"strconv"
"strings"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/stringid"
@ -309,6 +310,8 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
return nil, err
}
s.containerStateToDisk(container)
resp := &pb.CreateContainerResponse{
ContainerId: containerID,
}
@ -540,6 +543,9 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
specgen.AddAnnotation("crio/tty", fmt.Sprintf("%v", containerConfig.Tty))
specgen.AddAnnotation("crio/image", image)
created := time.Now()
specgen.AddAnnotation("crio/created", created.Format(time.RFC3339Nano))
metadataJSON, err := json.Marshal(metadata)
if err != nil {
return nil, err
@ -648,7 +654,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
return nil, err
}
container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty, sb.privileged)
container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty, sb.privileged, containerInfo.Dir, created)
if err != nil {
return nil, err
}

View file

@ -20,6 +20,8 @@ func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerReque
return nil, fmt.Errorf("failed to start container %s: %v", c.ID(), err)
}
s.containerStateToDisk(c)
resp := &pb.StartContainerResponse{}
logrus.Debugf("StartContainerResponse %+v", resp)
return resp, nil

View file

@ -21,6 +21,7 @@ func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusReq
if err = s.runtime.UpdateStatus(c); err != nil {
return nil, err
}
s.containerStateToDisk(c)
containerID := c.ID()
image := c.Image()

View file

@ -27,6 +27,8 @@ func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest
}
}
s.containerStateToDisk(c)
resp := &pb.StopContainerResponse{}
logrus.Debugf("StopContainerResponse: %+v", resp)
return resp, nil

View file

@ -62,6 +62,7 @@ func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxReque
if err := s.runtime.UpdateStatus(podInfraContainer); err != nil {
return nil, err
}
cState := s.runtime.ContainerStatus(podInfraContainer)
created := cState.Created.UnixNano()
rStatus := pb.PodSandboxState_SANDBOX_NOTREADY

View file

@ -106,17 +106,3 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
logrus.Debugf("RemovePodSandboxResponse %+v", resp)
return resp, nil
}
// RemoveAllPodSandboxes removes all pod sandboxes
func (s *Server) RemoveAllPodSandboxes() {
logrus.Debugf("RemoveAllPodSandboxes")
s.Update()
for _, sb := range s.state.sandboxes {
pod := &pb.RemovePodSandboxRequest{
PodSandboxId: sb.id,
}
if _, err := s.RemovePodSandbox(nil, pod); err != nil {
logrus.Warnf("could not RemovePodSandbox %s: %v", sb.id, err)
}
}
}

View file

@ -8,6 +8,7 @@ import (
"strconv"
"strings"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/containers/storage"
@ -269,6 +270,9 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
g.AddAnnotation("crio/hostname", hostname)
g.AddAnnotation("crio/kube_name", kubeName)
created := time.Now()
g.AddAnnotation("crio/created", created.Format(time.RFC3339Nano))
sb := &sandbox{
id: id,
namespace: namespace,
@ -403,7 +407,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err)
}
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged)
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged, podContainer.Dir, created)
if err != nil {
return nil, err
}
@ -421,6 +425,8 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err
}
s.containerStateToDisk(container)
resp = &pb.RunPodSandboxResponse{PodSandboxId: id}
logrus.Debugf("RunPodSandboxResponse: %+v", resp)
return resp, nil

View file

@ -19,9 +19,9 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR
if err = s.runtime.UpdateStatus(podInfraContainer); err != nil {
return nil, err
}
s.containerStateToDisk(podInfraContainer)
cState := s.runtime.ContainerStatus(podInfraContainer)
created := cState.Created.UnixNano()
netNsPath, err := podInfraContainer.NetNsPath()
if err != nil {
@ -42,7 +42,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusR
resp := &pb.PodSandboxStatusResponse{
Status: &pb.PodSandboxStatus{
Id: sandboxID,
CreatedAt: created,
CreatedAt: podInfraContainer.CreatedAt().UnixNano(),
Linux: &pb.LinuxPodSandboxStatus{
Namespaces: &pb.Namespace{
Network: netNsPath,

View file

@ -52,9 +52,23 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque
return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.id, err)
}
}
s.containerStateToDisk(c)
}
resp := &pb.StopPodSandboxResponse{}
logrus.Debugf("StopPodSandboxResponse: %+v", resp)
return resp, nil
}
// StopAllPodSandboxes removes all pod sandboxes
func (s *Server) StopAllPodSandboxes() {
logrus.Debugf("StopAllPodSandboxes")
for _, sb := range s.state.sandboxes {
pod := &pb.StopPodSandboxRequest{
PodSandboxId: sb.id,
}
if _, err := s.StopPodSandbox(nil, pod); err != nil {
logrus.Warnf("could not StopPodSandbox %s: %v", sb.id, err)
}
}
}

View file

@ -7,10 +7,12 @@ import (
"os"
"path/filepath"
"sync"
"time"
"github.com/Sirupsen/logrus"
"github.com/containers/image/types"
sstorage "github.com/containers/storage"
"github.com/docker/docker/pkg/ioutils"
"github.com/docker/docker/pkg/registrar"
"github.com/docker/docker/pkg/truncindex"
"github.com/kubernetes-incubator/cri-o/oci"
@ -120,6 +122,11 @@ func (s *Server) loadContainer(id string) error {
return err
}
containerDir, err := s.store.ContainerDirectory(id)
if err != nil {
return err
}
var img *pb.ImageSpec
image, ok := m.Annotations["crio/image"]
if ok {
@ -133,17 +140,47 @@ func (s *Server) loadContainer(id string) error {
return err
}
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["crio/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty, sb.privileged)
created, err := time.Parse(time.RFC3339Nano, m.Annotations["crio/created"])
if err != nil {
return err
}
if err = s.runtime.UpdateStatus(ctr); err != nil {
return fmt.Errorf("error updating status for container %s: %v", ctr.ID(), err)
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["crio/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty, sb.privileged, containerDir, created)
if err != nil {
return err
}
s.containerStateFromDisk(ctr)
s.addContainer(ctr)
return s.ctrIDIndex.Add(id)
}
func (s *Server) containerStateFromDisk(c *oci.Container) error {
if err := c.FromDisk(); err != nil {
return err
}
// ignore errors, this is a best effort to have up-to-date info about
// a given container before its state gets stored
s.runtime.UpdateStatus(c)
return nil
}
func (s *Server) containerStateToDisk(c *oci.Container) error {
// ignore errors, this is a best effort to have up-to-date info about
// a given container before its state gets stored
s.runtime.UpdateStatus(c)
jsonSource, err := ioutils.NewAtomicFileWriter(c.StatePath(), 0644)
if err != nil {
return err
}
defer jsonSource.Close()
enc := json.NewEncoder(jsonSource)
return enc.Encode(s.runtime.ContainerStatus(c))
}
func configNetNsPath(spec rspec.Spec) (string, error) {
for _, ns := range spec.Linux.Namespaces {
if ns.Type != rspec.NetworkNamespace {
@ -244,6 +281,11 @@ func (s *Server) loadSandbox(id string) error {
return err
}
sandboxDir, err := s.store.ContainerDirectory(id)
if err != nil {
return err
}
cname, err := s.reserveContainerName(m.Annotations["crio/container_id"], m.Annotations["crio/container_name"])
if err != nil {
return err
@ -254,13 +296,18 @@ func (s *Server) loadSandbox(id string) error {
}
}()
scontainer, err := oci.NewContainer(m.Annotations["crio/container_id"], cname, sandboxPath, m.Annotations["crio/log_path"], sb.netNs(), labels, annotations, nil, nil, id, false, privileged)
created, err := time.Parse(time.RFC3339Nano, m.Annotations["crio/created"])
if err != nil {
return err
}
if err = s.runtime.UpdateStatus(scontainer); err != nil {
return fmt.Errorf("error updating status for pod sandbox infra container %s: %v", scontainer.ID(), err)
scontainer, err := oci.NewContainer(m.Annotations["crio/container_id"], cname, sandboxPath, m.Annotations["crio/log_path"], sb.netNs(), labels, annotations, nil, nil, id, false, privileged, sandboxDir, created)
if err != nil {
return err
}
s.containerStateFromDisk(scontainer)
if err = label.ReserveLabel(processLabel); err != nil {
return err
}
@ -463,7 +510,7 @@ func (s *Server) cleanupSandboxesOnShutdown() {
_, err := os.Stat(shutdownFile)
if err == nil || !os.IsNotExist(err) {
logrus.Debugf("shutting down all sandboxes, on shutdown")
s.RemoveAllPodSandboxes()
s.StopAllPodSandboxes()
err = os.Remove(shutdownFile)
if err != nil {
logrus.Warnf("Failed to remove %q", shutdownFile)
@ -474,6 +521,10 @@ func (s *Server) cleanupSandboxesOnShutdown() {
// Shutdown attempts to shut down the server's storage cleanly
func (s *Server) Shutdown() error {
// why do this on clean shutdown! we want containers left running when ocid
// is down for whatever reason no?!
// notice this won't trigger just on system halt but also on normal
// ocid.service restart!!!
s.cleanupSandboxesOnShutdown()
_, err := s.store.Shutdown(false)
return err

View file

@ -61,7 +61,7 @@ function teardown() {
echo "$output"
[ "$status" -eq 0 ]
[[ "${output}" != "" ]]
[[ "${output}" =~ "${pod_id}" ]]
[[ "${output}" =~ "${ctr_id}" ]]
run crioctl ctr list --id "$ctr_id"
echo "$output"
@ -77,3 +77,59 @@ function teardown() {
cleanup_pods
stop_crio
}
@test "crio restore with bad state" {
start_crio
run crioctl pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run crioctl pod status --id "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
[[ "${output}" =~ "SANDBOX_READY" ]]
run crioctl ctr create --config "$TESTDATA"/container_config.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run crioctl ctr status --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
[[ "${output}" =~ "CONTAINER_CREATED" ]]
stop_crio
# simulate reboot with runc state going away
for i in $("$RUNTIME" list -q | xargs); do "$RUNTIME" delete -f $i; done
start_crio
run crioctl pod list
echo "$output"
[ "$status" -eq 0 ]
[[ "${output}" != "" ]]
[[ "${output}" =~ "${pod_id}" ]]
run crioctl pod status --id "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
[[ "${output}" =~ "SANDBOX_NOTREADY" ]]
run crioctl ctr list
echo "$output"
[ "$status" -eq 0 ]
[[ "${output}" != "" ]]
[[ "${output}" =~ "${ctr_id}" ]]
run crioctl ctr status --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
[[ "${output}" =~ "CONTAINER_EXITED" ]]
[[ "${output}" =~ "Exit Code: 255" ]]
cleanup_ctrs
cleanup_pods
stop_crio
}

View file

@ -23,7 +23,7 @@ func ExecCmd(name string, args ...string) (string, error) {
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("`%v %v` failed: %v (%v)", name, strings.Join(args, " "), stderr.String(), err)
return "", fmt.Errorf("`%v %v` failed: %v %v (%v)", name, strings.Join(args, " "), stderr.String(), stdout.String(), err)
}
return stdout.String(), nil