Merge pull request #1014 from runcom/oci-kill-all-fix
oci: fixes to properly handle container stop action
This commit is contained in:
commit
eafb7f7105
9 changed files with 54 additions and 31 deletions
|
@ -6,6 +6,7 @@ import (
|
|||
"github.com/kubernetes-incubator/cri-o/libkpod"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/urfave/cli"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -53,7 +54,7 @@ func rmCmd(c *cli.Context) error {
|
|||
force := c.Bool("force")
|
||||
|
||||
for _, container := range c.Args() {
|
||||
id, err2 := server.Remove(container, force)
|
||||
id, err2 := server.Remove(context.Background(), container, force)
|
||||
if err2 != nil {
|
||||
if err == nil {
|
||||
err = err2
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"github.com/kubernetes-incubator/cri-o/libkpod"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/urfave/cli"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -61,7 +62,7 @@ func stopCmd(c *cli.Context) error {
|
|||
}
|
||||
var lastError error
|
||||
for _, container := range c.Args() {
|
||||
cid, err := server.ContainerStop(container, stopTimeout)
|
||||
cid, err := server.ContainerStop(context.Background(), container, stopTimeout)
|
||||
if err != nil {
|
||||
if lastError != nil {
|
||||
fmt.Fprintln(os.Stderr, lastError)
|
||||
|
|
|
@ -6,10 +6,11 @@ import (
|
|||
|
||||
"github.com/kubernetes-incubator/cri-o/oci"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// Remove removes a container
|
||||
func (c *ContainerServer) Remove(container string, force bool) (string, error) {
|
||||
func (c *ContainerServer) Remove(ctx context.Context, container string, force bool) (string, error) {
|
||||
ctr, err := c.LookupContainer(container)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -22,7 +23,7 @@ func (c *ContainerServer) Remove(container string, force bool) (string, error) {
|
|||
return "", errors.Errorf("cannot remove paused container %s", ctrID)
|
||||
case oci.ContainerStateCreated, oci.ContainerStateRunning:
|
||||
if force {
|
||||
_, err = c.ContainerStop(container, -1)
|
||||
_, err = c.ContainerStop(ctx, container, 10)
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "unable to stop container %s", ctrID)
|
||||
}
|
||||
|
|
|
@ -3,10 +3,11 @@ package libkpod
|
|||
import (
|
||||
"github.com/kubernetes-incubator/cri-o/oci"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// ContainerStop stops a running container with a grace period (i.e., timeout).
|
||||
func (c *ContainerServer) ContainerStop(container string, timeout int64) (string, error) {
|
||||
func (c *ContainerServer) ContainerStop(ctx context.Context, container string, timeout int64) (string, error) {
|
||||
ctr, err := c.LookupContainer(container)
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "failed to find container %s", container)
|
||||
|
@ -20,7 +21,7 @@ func (c *ContainerServer) ContainerStop(container string, timeout int64) (string
|
|||
return "", errors.Errorf("cannot stop paused container %s", ctrID)
|
||||
default:
|
||||
if cStatus.Status != oci.ContainerStateStopped {
|
||||
if err := c.runtime.StopContainer(ctr, timeout); err != nil {
|
||||
if err := c.runtime.StopContainer(ctx, ctr, timeout); err != nil {
|
||||
return "", errors.Wrapf(err, "failed to stop container %s", ctrID)
|
||||
}
|
||||
if err := c.storageRuntimeServer.StopContainer(ctrID); err != nil {
|
||||
|
|
61
oci/oci.go
61
oci/oci.go
|
@ -17,6 +17,7 @@ import (
|
|||
"github.com/kubernetes-incubator/cri-o/utils"
|
||||
rspec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/net/context"
|
||||
"golang.org/x/sys/unix"
|
||||
kwait "k8s.io/apimachinery/pkg/util/wait"
|
||||
)
|
||||
|
@ -39,6 +40,10 @@ const (
|
|||
SystemdCgroupsManager = "systemd"
|
||||
// ContainerExitsDir is the location of container exit dirs
|
||||
ContainerExitsDir = "/var/run/crio/exits"
|
||||
|
||||
// killContainerTimeout is the timeout that we wait for the container to
|
||||
// be SIGKILLed.
|
||||
killContainerTimeout = 2 * time.Minute
|
||||
)
|
||||
|
||||
// New creates a new Runtime with options provided
|
||||
|
@ -542,25 +547,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp
|
|||
}, nil
|
||||
}
|
||||
|
||||
// StopContainer stops a container. Timeout is given in seconds.
|
||||
func (r *Runtime) StopContainer(c *Container, timeout int64) error {
|
||||
c.opLock.Lock()
|
||||
defer c.opLock.Unlock()
|
||||
|
||||
// Check if the process is around before sending a signal
|
||||
err := unix.Kill(c.state.Pid, 0)
|
||||
if err == unix.ESRCH {
|
||||
c.state.Finished = time.Now()
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", "--all", c.id, c.GetStopSignal()); err != nil {
|
||||
return fmt.Errorf("failed to stop container %s, %v", c.id, err)
|
||||
}
|
||||
if timeout == -1 {
|
||||
// default 10 seconds delay
|
||||
timeout = 10
|
||||
}
|
||||
func waitContainerStop(ctx context.Context, c *Container, timeout time.Duration) error {
|
||||
done := make(chan struct{})
|
||||
// we could potentially re-use "done" channel to exit the loop on timeout
|
||||
// but we use another channel "chControl" so that we won't never incur in the
|
||||
|
@ -588,7 +575,10 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error {
|
|||
select {
|
||||
case <-done:
|
||||
return nil
|
||||
case <-time.After(time.Duration(timeout) * time.Second):
|
||||
case <-ctx.Done():
|
||||
close(chControl)
|
||||
return ctx.Err()
|
||||
case <-time.After(timeout):
|
||||
close(chControl)
|
||||
err := unix.Kill(c.state.Pid, unix.SIGKILL)
|
||||
if err != nil && err != unix.ESRCH {
|
||||
|
@ -597,10 +587,39 @@ func (r *Runtime) StopContainer(c *Container, timeout int64) error {
|
|||
}
|
||||
|
||||
c.state.Finished = time.Now()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// StopContainer stops a container. Timeout is given in seconds.
|
||||
func (r *Runtime) StopContainer(ctx context.Context, c *Container, timeout int64) error {
|
||||
c.opLock.Lock()
|
||||
defer c.opLock.Unlock()
|
||||
|
||||
// Check if the process is around before sending a signal
|
||||
err := unix.Kill(c.state.Pid, 0)
|
||||
if err == unix.ESRCH {
|
||||
c.state.Finished = time.Now()
|
||||
return nil
|
||||
}
|
||||
|
||||
if timeout > 0 {
|
||||
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", c.id, c.GetStopSignal()); err != nil {
|
||||
return fmt.Errorf("failed to stop container %s, %v", c.id, err)
|
||||
}
|
||||
err = waitContainerStop(ctx, c, time.Duration(timeout)*time.Second)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
logrus.Warnf("Stop container %q timed out: %v", c.ID(), err)
|
||||
}
|
||||
|
||||
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", "--all", c.id, "KILL"); err != nil {
|
||||
return fmt.Errorf("failed to stop container %s, %v", c.id, err)
|
||||
}
|
||||
|
||||
return waitContainerStop(ctx, c, killContainerTimeout)
|
||||
}
|
||||
|
||||
// DeleteContainer deletes a container.
|
||||
func (r *Runtime) DeleteContainer(c *Container) error {
|
||||
c.opLock.Lock()
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
// RemoveContainer removes the container. If the container is running, the container
|
||||
// should be force removed.
|
||||
func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) {
|
||||
_, err := s.ContainerServer.Remove(req.ContainerId, true)
|
||||
_, err := s.ContainerServer.Remove(ctx, req.ContainerId, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
|
||||
// StopContainer stops a running container with a grace period (i.e., timeout).
|
||||
func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) {
|
||||
_, err := s.ContainerServer.ContainerStop(req.ContainerId, req.Timeout)
|
||||
_, err := s.ContainerServer.ContainerStop(ctx, req.ContainerId, req.Timeout)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
|
|||
if !sb.Stopped() {
|
||||
cState := s.Runtime().ContainerStatus(c)
|
||||
if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning {
|
||||
if err := s.Runtime().StopContainer(c, -1); err != nil {
|
||||
if err := s.Runtime().StopContainer(ctx, c, 10); err != nil {
|
||||
// Assume container is already stopped
|
||||
logrus.Warnf("failed to stop container %s: %v", c.Name(), err)
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque
|
|||
for _, c := range containers {
|
||||
cStatus := s.Runtime().ContainerStatus(c)
|
||||
if cStatus.Status != oci.ContainerStateStopped {
|
||||
if err := s.Runtime().StopContainer(c, -1); err != nil {
|
||||
if err := s.Runtime().StopContainer(ctx, c, 10); err != nil {
|
||||
return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err)
|
||||
}
|
||||
if c.ID() == podInfraContainer.ID() {
|
||||
|
|
Loading…
Reference in a new issue