From c6f5a290d86022d929c1db0638671f13fbc6ef13 Mon Sep 17 00:00:00 2001 From: Antonio Murdaca Date: Sun, 15 Oct 2017 22:05:41 +0200 Subject: [PATCH] oci: fixes to properly handle container stop action Signed-off-by: Antonio Murdaca --- cmd/kpod/rm.go | 3 +- cmd/kpod/stop.go | 3 +- libkpod/remove.go | 5 ++-- libkpod/stop.go | 5 ++-- oci/oci.go | 61 +++++++++++++++++++++++++------------- server/container_remove.go | 2 +- server/container_stop.go | 2 +- server/sandbox_remove.go | 2 +- server/sandbox_stop.go | 2 +- 9 files changed, 54 insertions(+), 31 deletions(-) diff --git a/cmd/kpod/rm.go b/cmd/kpod/rm.go index 69f68302..c40fa41c 100644 --- a/cmd/kpod/rm.go +++ b/cmd/kpod/rm.go @@ -6,6 +6,7 @@ import ( "github.com/kubernetes-incubator/cri-o/libkpod" "github.com/pkg/errors" "github.com/urfave/cli" + "golang.org/x/net/context" ) var ( @@ -53,7 +54,7 @@ func rmCmd(c *cli.Context) error { force := c.Bool("force") for _, container := range c.Args() { - id, err2 := server.Remove(container, force) + id, err2 := server.Remove(context.Background(), container, force) if err2 != nil { if err == nil { err = err2 diff --git a/cmd/kpod/stop.go b/cmd/kpod/stop.go index 06b26bb9..279f7b76 100644 --- a/cmd/kpod/stop.go +++ b/cmd/kpod/stop.go @@ -7,6 +7,7 @@ import ( "github.com/kubernetes-incubator/cri-o/libkpod" "github.com/pkg/errors" "github.com/urfave/cli" + "golang.org/x/net/context" ) var ( @@ -61,7 +62,7 @@ func stopCmd(c *cli.Context) error { } var lastError error for _, container := range c.Args() { - cid, err := server.ContainerStop(container, stopTimeout) + cid, err := server.ContainerStop(context.Background(), container, stopTimeout) if err != nil { if lastError != nil { fmt.Fprintln(os.Stderr, lastError) diff --git a/libkpod/remove.go b/libkpod/remove.go index a3aa6eea..5df9e8f7 100644 --- a/libkpod/remove.go +++ b/libkpod/remove.go @@ -6,10 +6,11 @@ import ( "github.com/kubernetes-incubator/cri-o/oci" "github.com/pkg/errors" + "golang.org/x/net/context" ) // Remove removes a container -func (c *ContainerServer) Remove(container string, force bool) (string, error) { +func (c *ContainerServer) Remove(ctx context.Context, container string, force bool) (string, error) { ctr, err := c.LookupContainer(container) if err != nil { return "", err @@ -22,7 +23,7 @@ func (c *ContainerServer) Remove(container string, force bool) (string, error) { return "", errors.Errorf("cannot remove paused container %s", ctrID) case oci.ContainerStateCreated, oci.ContainerStateRunning: if force { - _, err = c.ContainerStop(container, -1) + _, err = c.ContainerStop(ctx, container, 10) if err != nil { return "", errors.Wrapf(err, "unable to stop container %s", ctrID) } diff --git a/libkpod/stop.go b/libkpod/stop.go index 06712d45..86c9cbec 100644 --- a/libkpod/stop.go +++ b/libkpod/stop.go @@ -3,10 +3,11 @@ package libkpod import ( "github.com/kubernetes-incubator/cri-o/oci" "github.com/pkg/errors" + "golang.org/x/net/context" ) // ContainerStop stops a running container with a grace period (i.e., timeout). -func (c *ContainerServer) ContainerStop(container string, timeout int64) (string, error) { +func (c *ContainerServer) ContainerStop(ctx context.Context, container string, timeout int64) (string, error) { ctr, err := c.LookupContainer(container) if err != nil { return "", errors.Wrapf(err, "failed to find container %s", container) @@ -20,7 +21,7 @@ func (c *ContainerServer) ContainerStop(container string, timeout int64) (string return "", errors.Errorf("cannot stop paused container %s", ctrID) default: if cStatus.Status != oci.ContainerStateStopped { - if err := c.runtime.StopContainer(ctr, timeout); err != nil { + if err := c.runtime.StopContainer(ctx, ctr, timeout); err != nil { return "", errors.Wrapf(err, "failed to stop container %s", ctrID) } if err := c.storageRuntimeServer.StopContainer(ctrID); err != nil { diff --git a/oci/oci.go b/oci/oci.go index a5f14ea3..b5a433c3 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -17,6 +17,7 @@ import ( "github.com/kubernetes-incubator/cri-o/utils" rspec "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" + "golang.org/x/net/context" "golang.org/x/sys/unix" kwait "k8s.io/apimachinery/pkg/util/wait" ) @@ -39,6 +40,10 @@ const ( SystemdCgroupsManager = "systemd" // ContainerExitsDir is the location of container exit dirs ContainerExitsDir = "/var/run/crio/exits" + + // killContainerTimeout is the timeout that we wait for the container to + // be SIGKILLed. + killContainerTimeout = 2 * time.Minute ) // New creates a new Runtime with options provided @@ -542,25 +547,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp }, nil } -// StopContainer stops a container. Timeout is given in seconds. -func (r *Runtime) StopContainer(c *Container, timeout int64) error { - c.opLock.Lock() - defer c.opLock.Unlock() - - // Check if the process is around before sending a signal - err := unix.Kill(c.state.Pid, 0) - if err == unix.ESRCH { - c.state.Finished = time.Now() - return nil - } - - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", "--all", c.id, c.GetStopSignal()); err != nil { - return fmt.Errorf("failed to stop container %s, %v", c.id, err) - } - if timeout == -1 { - // default 10 seconds delay - timeout = 10 - } +func waitContainerStop(ctx context.Context, c *Container, timeout time.Duration) error { done := make(chan struct{}) // we could potentially re-use "done" channel to exit the loop on timeout // but we use another channel "chControl" so that we won't never incur in the @@ -588,7 +575,10 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error { select { case <-done: return nil - case <-time.After(time.Duration(timeout) * time.Second): + case <-ctx.Done(): + close(chControl) + return ctx.Err() + case <-time.After(timeout): close(chControl) err := unix.Kill(c.state.Pid, unix.SIGKILL) if err != nil && err != unix.ESRCH { @@ -597,10 +587,39 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error { } c.state.Finished = time.Now() - return nil } +// StopContainer stops a container. Timeout is given in seconds. +func (r *Runtime) StopContainer(ctx context.Context, c *Container, timeout int64) error { + c.opLock.Lock() + defer c.opLock.Unlock() + + // Check if the process is around before sending a signal + err := unix.Kill(c.state.Pid, 0) + if err == unix.ESRCH { + c.state.Finished = time.Now() + return nil + } + + if timeout > 0 { + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", c.id, c.GetStopSignal()); err != nil { + return fmt.Errorf("failed to stop container %s, %v", c.id, err) + } + err = waitContainerStop(ctx, c, time.Duration(timeout)*time.Second) + if err == nil { + return nil + } + logrus.Warnf("Stop container %q timed out: %v", c.ID(), err) + } + + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", "--all", c.id, "KILL"); err != nil { + return fmt.Errorf("failed to stop container %s, %v", c.id, err) + } + + return waitContainerStop(ctx, c, killContainerTimeout) +} + // DeleteContainer deletes a container. func (r *Runtime) DeleteContainer(c *Container) error { c.opLock.Lock() diff --git a/server/container_remove.go b/server/container_remove.go index cedfc602..87102372 100644 --- a/server/container_remove.go +++ b/server/container_remove.go @@ -9,7 +9,7 @@ import ( // RemoveContainer removes the container. If the container is running, the container // should be force removed. func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) { - _, err := s.ContainerServer.Remove(req.ContainerId, true) + _, err := s.ContainerServer.Remove(ctx, req.ContainerId, true) if err != nil { return nil, err } diff --git a/server/container_stop.go b/server/container_stop.go index c0093cfd..f74ed86e 100644 --- a/server/container_stop.go +++ b/server/container_stop.go @@ -8,7 +8,7 @@ import ( // StopContainer stops a running container with a grace period (i.e., timeout). func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) { - _, err := s.ContainerServer.ContainerStop(req.ContainerId, req.Timeout) + _, err := s.ContainerServer.ContainerStop(ctx, req.ContainerId, req.Timeout) if err != nil { return nil, err } diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go index 856b8938..b0e07384 100644 --- a/server/sandbox_remove.go +++ b/server/sandbox_remove.go @@ -41,7 +41,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR if !sb.Stopped() { cState := s.Runtime().ContainerStatus(c) if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning { - if err := s.Runtime().StopContainer(c, -1); err != nil { + if err := s.Runtime().StopContainer(ctx, c, 10); err != nil { // Assume container is already stopped logrus.Warnf("failed to stop container %s: %v", c.Name(), err) } diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go index 7db436d1..9d6a5aa3 100644 --- a/server/sandbox_stop.go +++ b/server/sandbox_stop.go @@ -56,7 +56,7 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque for _, c := range containers { cStatus := s.Runtime().ContainerStatus(c) if cStatus.Status != oci.ContainerStateStopped { - if err := s.Runtime().StopContainer(c, -1); err != nil { + if err := s.Runtime().StopContainer(ctx, c, 10); err != nil { return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) } if c.ID() == podInfraContainer.ID() {