Merge pull request #1033 from runcom/fix-stop-1.0

oci: fixes to properly handle container stop action
This commit is contained in:
Mrunal Patel 2017-10-18 08:05:00 -07:00 committed by GitHub
commit 5f826acfaf
9 changed files with 54 additions and 31 deletions

View file

@ -6,6 +6,7 @@ import (
"github.com/kubernetes-incubator/cri-o/libkpod"
"github.com/pkg/errors"
"github.com/urfave/cli"
"golang.org/x/net/context"
)
var (
@ -53,7 +54,7 @@ func rmCmd(c *cli.Context) error {
force := c.Bool("force")
for _, container := range c.Args() {
id, err2 := server.Remove(container, force)
id, err2 := server.Remove(context.Background(), container, force)
if err2 != nil {
if err == nil {
err = err2

View file

@ -7,6 +7,7 @@ import (
"github.com/kubernetes-incubator/cri-o/libkpod"
"github.com/pkg/errors"
"github.com/urfave/cli"
"golang.org/x/net/context"
)
var (
@ -61,7 +62,7 @@ func stopCmd(c *cli.Context) error {
}
var lastError error
for _, container := range c.Args() {
cid, err := server.ContainerStop(container, stopTimeout)
cid, err := server.ContainerStop(context.Background(), container, stopTimeout)
if err != nil {
if lastError != nil {
fmt.Fprintln(os.Stderr, lastError)

View file

@ -6,10 +6,11 @@ import (
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/pkg/errors"
"golang.org/x/net/context"
)
// Remove removes a container
func (c *ContainerServer) Remove(container string, force bool) (string, error) {
func (c *ContainerServer) Remove(ctx context.Context, container string, force bool) (string, error) {
ctr, err := c.LookupContainer(container)
if err != nil {
return "", err
@ -22,7 +23,7 @@ func (c *ContainerServer) Remove(container string, force bool) (string, error) {
return "", errors.Errorf("cannot remove paused container %s", ctrID)
case oci.ContainerStateCreated, oci.ContainerStateRunning:
if force {
_, err = c.ContainerStop(container, -1)
_, err = c.ContainerStop(ctx, container, 10)
if err != nil {
return "", errors.Wrapf(err, "unable to stop container %s", ctrID)
}

View file

@ -3,10 +3,11 @@ package libkpod
import (
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/pkg/errors"
"golang.org/x/net/context"
)
// ContainerStop stops a running container with a grace period (i.e., timeout).
func (c *ContainerServer) ContainerStop(container string, timeout int64) (string, error) {
func (c *ContainerServer) ContainerStop(ctx context.Context, container string, timeout int64) (string, error) {
ctr, err := c.LookupContainer(container)
if err != nil {
return "", errors.Wrapf(err, "failed to find container %s", container)
@ -20,7 +21,7 @@ func (c *ContainerServer) ContainerStop(container string, timeout int64) (string
return "", errors.Errorf("cannot stop paused container %s", ctrID)
default:
if cStatus.Status != oci.ContainerStateStopped {
if err := c.runtime.StopContainer(ctr, timeout); err != nil {
if err := c.runtime.StopContainer(ctx, ctr, timeout); err != nil {
return "", errors.Wrapf(err, "failed to stop container %s", ctrID)
}
if err := c.storageRuntimeServer.StopContainer(ctrID); err != nil {

View file

@ -17,6 +17,7 @@ import (
"github.com/kubernetes-incubator/cri-o/utils"
rspec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
kwait "k8s.io/apimachinery/pkg/util/wait"
)
@ -39,6 +40,10 @@ const (
SystemdCgroupsManager = "systemd"
// ContainerExitsDir is the location of container exit dirs
ContainerExitsDir = "/var/run/crio/exits"
// killContainerTimeout is the timeout that we wait for the container to
// be SIGKILLed.
killContainerTimeout = 2 * time.Minute
)
// New creates a new Runtime with options provided
@ -542,25 +547,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp
}, nil
}
// StopContainer stops a container. Timeout is given in seconds.
func (r *Runtime) StopContainer(c *Container, timeout int64) error {
c.opLock.Lock()
defer c.opLock.Unlock()
// Check if the process is around before sending a signal
err := unix.Kill(c.state.Pid, 0)
if err == unix.ESRCH {
c.state.Finished = time.Now()
return nil
}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", "--all", c.id, c.GetStopSignal()); err != nil {
return fmt.Errorf("failed to stop container %s, %v", c.id, err)
}
if timeout == -1 {
// default 10 seconds delay
timeout = 10
}
func waitContainerStop(ctx context.Context, c *Container, timeout time.Duration) error {
done := make(chan struct{})
// we could potentially re-use "done" channel to exit the loop on timeout
// but we use another channel "chControl" so that we won't never incur in the
@ -588,7 +575,10 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error {
select {
case <-done:
return nil
case <-time.After(time.Duration(timeout) * time.Second):
case <-ctx.Done():
close(chControl)
return ctx.Err()
case <-time.After(timeout):
close(chControl)
err := unix.Kill(c.state.Pid, unix.SIGKILL)
if err != nil && err != unix.ESRCH {
@ -597,10 +587,39 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error {
}
c.state.Finished = time.Now()
return nil
}
// StopContainer stops a container. Timeout is given in seconds.
func (r *Runtime) StopContainer(ctx context.Context, c *Container, timeout int64) error {
c.opLock.Lock()
defer c.opLock.Unlock()
// Check if the process is around before sending a signal
err := unix.Kill(c.state.Pid, 0)
if err == unix.ESRCH {
c.state.Finished = time.Now()
return nil
}
if timeout > 0 {
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", c.id, c.GetStopSignal()); err != nil {
return fmt.Errorf("failed to stop container %s, %v", c.id, err)
}
err = waitContainerStop(ctx, c, time.Duration(timeout)*time.Second)
if err == nil {
return nil
}
logrus.Warnf("Stop container %q timed out: %v", c.ID(), err)
}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", "--all", c.id, "KILL"); err != nil {
return fmt.Errorf("failed to stop container %s, %v", c.id, err)
}
return waitContainerStop(ctx, c, killContainerTimeout)
}
// DeleteContainer deletes a container.
func (r *Runtime) DeleteContainer(c *Container) error {
c.opLock.Lock()

View file

@ -9,7 +9,7 @@ import (
// RemoveContainer removes the container. If the container is running, the container
// should be force removed.
func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) {
_, err := s.ContainerServer.Remove(req.ContainerId, true)
_, err := s.ContainerServer.Remove(ctx, req.ContainerId, true)
if err != nil {
return nil, err
}

View file

@ -8,7 +8,7 @@ import (
// StopContainer stops a running container with a grace period (i.e., timeout).
func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) {
_, err := s.ContainerServer.ContainerStop(req.ContainerId, req.Timeout)
_, err := s.ContainerServer.ContainerStop(ctx, req.ContainerId, req.Timeout)
if err != nil {
return nil, err
}

View file

@ -41,7 +41,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
if !sb.Stopped() {
cState := s.Runtime().ContainerStatus(c)
if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning {
if err := s.Runtime().StopContainer(c, -1); err != nil {
if err := s.Runtime().StopContainer(ctx, c, 10); err != nil {
// Assume container is already stopped
logrus.Warnf("failed to stop container %s: %v", c.Name(), err)
}

View file

@ -56,7 +56,7 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque
for _, c := range containers {
cStatus := s.Runtime().ContainerStatus(c)
if cStatus.Status != oci.ContainerStateStopped {
if err := s.Runtime().StopContainer(c, -1); err != nil {
if err := s.Runtime().StopContainer(ctx, c, 10); err != nil {
return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err)
}
if c.ID() == podInfraContainer.ID() {