Merge pull request #549 from runcom/stability-fixes

Stability fixes
This commit is contained in:
Mrunal Patel 2017-06-01 10:10:14 -07:00 committed by GitHub
commit 36255b8663
5 changed files with 70 additions and 31 deletions

View file

@ -550,14 +550,17 @@ func (r *Runtime) UpdateStatus(c *Container) error {
defer c.opLock.Unlock()
out, err := exec.Command(r.Path(c), "state", c.name).CombinedOutput()
if err != nil {
if err := unix.Kill(c.state.Pid, 0); err == syscall.ESRCH {
// there are many code paths that could lead to have a bad state in the
// underlying runtime.
// On any error like a container went away or we rebooted and containers
// went away we do not error out stopping kubernetes to recover.
// We always populate the fields below so kube can restart/reschedule
// containers failing.
c.state.Status = ContainerStateStopped
c.state.Finished = time.Now()
c.state.ExitCode = 255
return nil
}
return fmt.Errorf("error getting container state for %s: %s: %q", c.name, err, out)
}
if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(&c.state); err != nil {
return fmt.Errorf("failed to decode container status for %s: %s", c.name, err)
}

View file

@ -345,11 +345,11 @@ func (r *runtimeService) RemovePodSandbox(idOrName string) error {
}
func (r *runtimeService) DeleteContainer(idOrName string) error {
container, err := r.storageImageServer.GetStore().Container(idOrName)
if err != nil {
if err == storage.ErrContainerUnknown {
if idOrName == "" {
return ErrInvalidContainerID
}
container, err := r.storageImageServer.GetStore().Container(idOrName)
if err != nil {
return err
}
err = r.storageImageServer.GetStore().DeleteContainer(container.ID)
@ -403,11 +403,11 @@ func (r *runtimeService) StartContainer(idOrName string) (string, error) {
}
func (r *runtimeService) StopContainer(idOrName string) error {
container, err := r.storageImageServer.GetStore().Container(idOrName)
if err != nil {
if err == storage.ErrContainerUnknown {
if idOrName == "" {
return ErrInvalidContainerID
}
container, err := r.storageImageServer.GetStore().Container(idOrName)
if err != nil {
return err
}
err = r.storageImageServer.GetStore().Unmount(container.ID)

View file

@ -47,6 +47,10 @@ func addOciBindMounts(sb *sandbox, containerConfig *pb.ContainerConfig, specgen
return fmt.Errorf("Mount.HostPath is empty")
}
if _, err := os.Stat(src); err != nil && os.IsNotExist(err) {
os.MkdirAll(src, 0644)
}
options := []string{"rw"}
if mount.Readonly {
options = []string{"ro"}

View file

@ -5,7 +5,10 @@ import (
"syscall"
"github.com/Sirupsen/logrus"
"github.com/containers/storage"
"github.com/docker/docker/pkg/mount"
"github.com/kubernetes-incubator/cri-o/oci"
pkgstorage "github.com/kubernetes-incubator/cri-o/pkg/storage"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/net/context"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
@ -56,10 +59,11 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
continue
}
if err := s.storageRuntimeServer.StopContainer(c.ID()); err != nil {
return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err)
if err := s.storageRuntimeServer.StopContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown {
// assume container already umounted
logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.id, err)
}
if err := s.storageRuntimeServer.DeleteContainer(c.ID()); err != nil {
if err := s.storageRuntimeServer.DeleteContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown {
return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.id, err)
}
@ -76,10 +80,12 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
// unmount the shm for the pod
if sb.shmPath != "/dev/shm" {
if mounted, err := mount.Mounted(sb.shmPath); err == nil && mounted {
if err := syscall.Unmount(sb.shmPath, syscall.MNT_DETACH); err != nil {
return nil, err
}
}
}
if err := sb.netNsRemove(); err != nil {
return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err)
@ -89,9 +95,9 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
// Remove the files related to the sandbox
if err := s.storageRuntimeServer.StopContainer(sb.id); err != nil {
return nil, fmt.Errorf("failed to delete sandbox container in pod sandbox %s: %v", sb.id, err)
logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.id, err)
}
if err := s.storageRuntimeServer.RemovePodSandbox(sb.id); err != nil {
if err := s.storageRuntimeServer.RemovePodSandbox(sb.id); err != nil && err != pkgstorage.ErrInvalidSandboxID {
return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.id, err)
}

View file

@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"syscall"
@ -66,6 +67,10 @@ func (s *Server) runContainer(container *oci.Container, cgroupParent string) err
return nil
}
var (
conflictRE = regexp.MustCompile(`already reserved for pod "([0-9a-z]+)"`)
)
// RunPodSandbox creates and runs a pod-level sandbox.
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
s.updateLock.RLock()
@ -84,8 +89,30 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
id, name, err := s.generatePodIDandName(kubeName, namespace, attempt)
if err != nil {
if strings.Contains(err.Error(), "already reserved for pod") {
matches := conflictRE.FindStringSubmatch(err.Error())
if len(matches) != 2 {
return nil, err
}
dupID := matches[1]
if _, err := s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: dupID}); err != nil {
return nil, err
}
id, name, err = s.generatePodIDandName(kubeName, namespace, attempt)
if err != nil {
return nil, err
}
} else {
return nil, err
}
}
defer func() {
if err != nil {
s.releasePodName(name)
}
}()
_, containerName, err := s.generateContainerIDandName(name, "infra", attempt)
if err != nil {
return nil, err
@ -93,7 +120,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
defer func() {
if err != nil {
s.releasePodName(name)
s.releaseContainerName(containerName)
}
}()
@ -228,12 +255,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err
}
defer func() {
if err != nil {
s.releaseContainerName(containerName)
}
}()
if err = s.ctrIDIndex.Add(id); err != nil {
return nil, err
}
@ -298,9 +319,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
defer func() {
if err != nil {
s.removeSandbox(id)
if err2 := s.podIDIndex.Delete(id); err2 != nil {
logrus.Warnf("couldn't delete pod id %s from idIndex", id)
}
}
}()
@ -309,6 +327,14 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err
}
defer func() {
if err != nil {
if err := s.podIDIndex.Delete(id); err != nil {
logrus.Warnf("couldn't delete pod id %s from idIndex", id)
}
}
}()
for k, v := range annotations {
g.AddAnnotation(k, v)
}