Merge pull request #549 from runcom/stability-fixes

Stability fixes
This commit is contained in:
Mrunal Patel 2017-06-01 10:10:14 -07:00 committed by GitHub
commit 36255b8663
5 changed files with 70 additions and 31 deletions

View file

@ -550,14 +550,17 @@ func (r *Runtime) UpdateStatus(c *Container) error {
defer c.opLock.Unlock() defer c.opLock.Unlock()
out, err := exec.Command(r.Path(c), "state", c.name).CombinedOutput() out, err := exec.Command(r.Path(c), "state", c.name).CombinedOutput()
if err != nil { if err != nil {
if err := unix.Kill(c.state.Pid, 0); err == syscall.ESRCH { // there are many code paths that could lead to have a bad state in the
// underlying runtime.
// On any error like a container went away or we rebooted and containers
// went away we do not error out stopping kubernetes to recover.
// We always populate the fields below so kube can restart/reschedule
// containers failing.
c.state.Status = ContainerStateStopped c.state.Status = ContainerStateStopped
c.state.Finished = time.Now() c.state.Finished = time.Now()
c.state.ExitCode = 255 c.state.ExitCode = 255
return nil return nil
} }
return fmt.Errorf("error getting container state for %s: %s: %q", c.name, err, out)
}
if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(&c.state); err != nil { if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(&c.state); err != nil {
return fmt.Errorf("failed to decode container status for %s: %s", c.name, err) return fmt.Errorf("failed to decode container status for %s: %s", c.name, err)
} }

View file

@ -345,11 +345,11 @@ func (r *runtimeService) RemovePodSandbox(idOrName string) error {
} }
func (r *runtimeService) DeleteContainer(idOrName string) error { func (r *runtimeService) DeleteContainer(idOrName string) error {
container, err := r.storageImageServer.GetStore().Container(idOrName) if idOrName == "" {
if err != nil {
if err == storage.ErrContainerUnknown {
return ErrInvalidContainerID return ErrInvalidContainerID
} }
container, err := r.storageImageServer.GetStore().Container(idOrName)
if err != nil {
return err return err
} }
err = r.storageImageServer.GetStore().DeleteContainer(container.ID) err = r.storageImageServer.GetStore().DeleteContainer(container.ID)
@ -403,11 +403,11 @@ func (r *runtimeService) StartContainer(idOrName string) (string, error) {
} }
func (r *runtimeService) StopContainer(idOrName string) error { func (r *runtimeService) StopContainer(idOrName string) error {
container, err := r.storageImageServer.GetStore().Container(idOrName) if idOrName == "" {
if err != nil {
if err == storage.ErrContainerUnknown {
return ErrInvalidContainerID return ErrInvalidContainerID
} }
container, err := r.storageImageServer.GetStore().Container(idOrName)
if err != nil {
return err return err
} }
err = r.storageImageServer.GetStore().Unmount(container.ID) err = r.storageImageServer.GetStore().Unmount(container.ID)

View file

@ -47,6 +47,10 @@ func addOciBindMounts(sb *sandbox, containerConfig *pb.ContainerConfig, specgen
return fmt.Errorf("Mount.HostPath is empty") return fmt.Errorf("Mount.HostPath is empty")
} }
if _, err := os.Stat(src); err != nil && os.IsNotExist(err) {
os.MkdirAll(src, 0644)
}
options := []string{"rw"} options := []string{"rw"}
if mount.Readonly { if mount.Readonly {
options = []string{"ro"} options = []string{"ro"}

View file

@ -5,7 +5,10 @@ import (
"syscall" "syscall"
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/containers/storage"
"github.com/docker/docker/pkg/mount"
"github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/oci"
pkgstorage "github.com/kubernetes-incubator/cri-o/pkg/storage"
"github.com/opencontainers/selinux/go-selinux/label" "github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/net/context" "golang.org/x/net/context"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
@ -56,10 +59,11 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
continue continue
} }
if err := s.storageRuntimeServer.StopContainer(c.ID()); err != nil { if err := s.storageRuntimeServer.StopContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown {
return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) // assume container already umounted
logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.id, err)
} }
if err := s.storageRuntimeServer.DeleteContainer(c.ID()); err != nil { if err := s.storageRuntimeServer.DeleteContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown {
return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err) return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err)
} }
@ -76,10 +80,12 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
// unmount the shm for the pod // unmount the shm for the pod
if sb.shmPath != "/dev/shm" { if sb.shmPath != "/dev/shm" {
if mounted, err := mount.Mounted(sb.shmPath); err == nil && mounted {
if err := syscall.Unmount(sb.shmPath, syscall.MNT_DETACH); err != nil { if err := syscall.Unmount(sb.shmPath, syscall.MNT_DETACH); err != nil {
return nil, err return nil, err
} }
} }
}
if err := sb.netNsRemove(); err != nil { if err := sb.netNsRemove(); err != nil {
return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err) return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err)
@ -89,9 +95,9 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
// Remove the files related to the sandbox // Remove the files related to the sandbox
if err := s.storageRuntimeServer.StopContainer(sb.id); err != nil { if err := s.storageRuntimeServer.StopContainer(sb.id); err != nil {
return nil, fmt.Errorf("failed to delete sandbox container in pod sandbox %s: %v", sb.id, err) logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.id, err)
} }
if err := s.storageRuntimeServer.RemovePodSandbox(sb.id); err != nil { if err := s.storageRuntimeServer.RemovePodSandbox(sb.id); err != nil && err != pkgstorage.ErrInvalidSandboxID {
return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.id, err) return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.id, err)
} }

View file

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"regexp"
"strconv" "strconv"
"strings" "strings"
"syscall" "syscall"
@ -66,6 +67,10 @@ func (s *Server) runContainer(container *oci.Container, cgroupParent string) err
return nil return nil
} }
var (
conflictRE = regexp.MustCompile(`already reserved for pod "([0-9a-z]+)"`)
)
// RunPodSandbox creates and runs a pod-level sandbox. // RunPodSandbox creates and runs a pod-level sandbox.
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
s.updateLock.RLock() s.updateLock.RLock()
@ -84,8 +89,30 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
id, name, err := s.generatePodIDandName(kubeName, namespace, attempt) id, name, err := s.generatePodIDandName(kubeName, namespace, attempt)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "already reserved for pod") {
matches := conflictRE.FindStringSubmatch(err.Error())
if len(matches) != 2 {
return nil, err return nil, err
} }
dupID := matches[1]
if _, err := s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: dupID}); err != nil {
return nil, err
}
id, name, err = s.generatePodIDandName(kubeName, namespace, attempt)
if err != nil {
return nil, err
}
} else {
return nil, err
}
}
defer func() {
if err != nil {
s.releasePodName(name)
}
}()
_, containerName, err := s.generateContainerIDandName(name, "infra", attempt) _, containerName, err := s.generateContainerIDandName(name, "infra", attempt)
if err != nil { if err != nil {
return nil, err return nil, err
@ -93,7 +120,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
defer func() { defer func() {
if err != nil { if err != nil {
s.releasePodName(name) s.releaseContainerName(containerName)
} }
}() }()
@ -228,12 +255,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err return nil, err
} }
defer func() {
if err != nil {
s.releaseContainerName(containerName)
}
}()
if err = s.ctrIDIndex.Add(id); err != nil { if err = s.ctrIDIndex.Add(id); err != nil {
return nil, err return nil, err
} }
@ -298,9 +319,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
defer func() { defer func() {
if err != nil { if err != nil {
s.removeSandbox(id) s.removeSandbox(id)
if err2 := s.podIDIndex.Delete(id); err2 != nil {
logrus.Warnf("couldn't delete pod id %s from idIndex", id)
}
} }
}() }()
@ -309,6 +327,14 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err return nil, err
} }
defer func() {
if err != nil {
if err := s.podIDIndex.Delete(id); err != nil {
logrus.Warnf("couldn't delete pod id %s from idIndex", id)
}
}
}()
for k, v := range annotations { for k, v := range annotations {
g.AddAnnotation(k, v) g.AddAnnotation(k, v)
} }