diff --git a/lib/stop.go b/lib/stop.go index 7dbbd066..6a8909de 100644 --- a/lib/stop.go +++ b/lib/stop.go @@ -24,6 +24,9 @@ func (c *ContainerServer) ContainerStop(ctx context.Context, container string, t if err := c.runtime.StopContainer(ctx, ctr, timeout); err != nil { return "", errors.Wrapf(err, "failed to stop container %s", ctrID) } + if err := c.runtime.WaitContainerStateStopped(ctx, ctr, timeout); err != nil { + return "", errors.Wrapf(err, "failed to get container 'stopped' status %s", ctrID) + } if err := c.storageRuntimeServer.StopContainer(ctrID); err != nil { return "", errors.Wrapf(err, "failed to unmount container %s", ctrID) } diff --git a/oci/oci.go b/oci/oci.go index 26b4ec26..73b9f885 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -604,6 +604,56 @@ func waitContainerStop(ctx context.Context, c *Container, timeout time.Duration) return nil } +// WaitContainerStateStopped runs a loop polling UpdateStatus(), seeking for +// the container status to be updated to 'stopped'. Either it gets the expected +// status and returns nil, or it reaches the timeout and returns an error. +func (r *Runtime) WaitContainerStateStopped(ctx context.Context, c *Container, timeout int64) (err error) { + // No need to go further and spawn the go routine if the container + // is already in the expected status. + if r.ContainerStatus(c).Status == ContainerStateStopped { + return nil + } + + done := make(chan error) + chControl := make(chan struct{}) + go func() { + for { + select { + case <-chControl: + return + default: + // Check if the container is stopped + if err := r.UpdateStatus(c); err != nil { + done <- err + close(done) + return + } + if r.ContainerStatus(c).Status == ContainerStateStopped { + close(done) + return + } + time.Sleep(100 * time.Millisecond) + } + } + }() + select { + case err = <-done: + break + case <-ctx.Done(): + close(chControl) + return ctx.Err() + case <-time.After(time.Duration(timeout) * time.Second): + close(chControl) + return fmt.Errorf("failed to get container stopped status: %ds timeout reached", timeout) + } + + if err != nil { + return fmt.Errorf("failed to get container stopped status: %v", err) + } + + return nil +} + // StopContainer stops a container. Timeout is given in seconds. func (r *Runtime) StopContainer(ctx context.Context, c *Container, timeout int64) error { c.opLock.Lock() @@ -655,6 +705,7 @@ func (r *Runtime) SetStartFailed(c *Container, err error) { func (r *Runtime) UpdateStatus(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() + out, err := exec.Command(r.Path(c), "state", c.id).Output() if err != nil { // there are many code paths that could lead to have a bad state in the diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go index 85db32a5..4e2716bc 100644 --- a/server/sandbox_remove.go +++ b/server/sandbox_remove.go @@ -48,10 +48,14 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR if !sb.Stopped() { cState := s.Runtime().ContainerStatus(c) if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning { - if err := s.Runtime().StopContainer(ctx, c, 10); err != nil { + timeout := int64(10) + if err := s.Runtime().StopContainer(ctx, c, timeout); err != nil { // Assume container is already stopped logrus.Warnf("failed to stop container %s: %v", c.Name(), err) } + if err := s.Runtime().WaitContainerStateStopped(ctx, c, timeout); err != nil { + return nil, fmt.Errorf("failed to get container 'stopped' status %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } } } diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go index 5bd24d15..42fef50a 100644 --- a/server/sandbox_stop.go +++ b/server/sandbox_stop.go @@ -56,9 +56,13 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque for _, c := range containers { cStatus := s.Runtime().ContainerStatus(c) if cStatus.Status != oci.ContainerStateStopped { - if err := s.Runtime().StopContainer(ctx, c, 10); err != nil { + timeout := int64(10) + if err := s.Runtime().StopContainer(ctx, c, timeout); err != nil { return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) } + if err := s.Runtime().WaitContainerStateStopped(ctx, c, timeout); err != nil { + return nil, fmt.Errorf("failed to get container 'stopped' status %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } if c.ID() == podInfraContainer.ID() { continue }