From eae1b7d6bd680eda47527bd9e68b10c7af96b9e8 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 27 Sep 2017 17:35:20 +0200 Subject: [PATCH 1/2] oci: Delete container resources upon creation failure When cri-o assumes the container creation failed, we need to let the runtime know that we're bailing out so that it cancels all ongoing operation. In container creation timeout situations for example, failing to explictly request the runtime for container deletion can lead to large resource leaks as kubelet re-creates a failing container, while the runtime finishes creating the previous one(s). Signed-off-by: Samuel Ortiz --- oci/oci.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/oci/oci.go b/oci/oci.go index e9babb7f..b34b00a2 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -151,7 +151,7 @@ func getOCIVersion(name string, args ...string) (string, error) { } // CreateContainer creates a container. -func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { +func (r *Runtime) CreateContainer(c *Container, cgroupParent string) (err error) { var stderrBuf bytes.Buffer parentPipe, childPipe, err := newPipe() childStartPipe, parentStartPipe, err := newPipe() @@ -248,6 +248,13 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) error { return err } + // We will delete all container resources if creation fails + defer func() { + if err != nil { + r.DeleteContainer(c) + } + }() + // Wait to get container pid from conmon type syncStruct struct { si *syncInfo From d27451029b29bab6af0a7442272ae18cb15a8ea1 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 28 Sep 2017 15:01:46 +0200 Subject: [PATCH 2/2] oci: Increase the container creation timeout Under very heavy loads (e.g. 100 pods created at the same time), VM based runtimes can take more than 10 seconds to create a pod. Signed-off-by: Samuel Ortiz --- oci/oci.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oci/oci.go b/oci/oci.go index b34b00a2..4c7a2230 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -31,7 +31,7 @@ const ( // ContainerStateStopped represents the stopped state of a container ContainerStateStopped = "stopped" // ContainerCreateTimeout represents the value of container creating timeout - ContainerCreateTimeout = 10 * time.Second + ContainerCreateTimeout = 240 * time.Second // CgroupfsCgroupsManager represents cgroupfs native cgroup manager CgroupfsCgroupsManager = "cgroupfs"