From cff96ecf5cee5776e50d0841470e949a104a6135 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 19 Jan 2018 09:15:20 -0800 Subject: [PATCH 1/4] server/container_create: Simplify Kube/OCI process merge Handling both the nil ociConfig check and kubeCommands length check on the same line lets us drop one level of nesting. And that initial zero-length check makes the internal kubeCommands nil check redundant, so we can drop that too. Also adjust the buildOCIProcessArgs argument from a *v1.Image to *v1.ImageConfig, because we don't need anything else from Image here. This moves the nil Image check from inside buildOCIProcessArgs to the calling function, but I think that's worth the tighter scoping. Signed-off-by: W. Trevor King --- server/container_create.go | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/server/container_create.go b/server/container_create.go index a4652cf3..366cb566 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -376,7 +376,7 @@ func addDevices(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specge } // buildOCIProcessArgs build an OCI compatible process arguments slice. -func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, imageOCIConfig *v1.Image) ([]string, error) { +func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, ociConfig *v1.ImageConfig) ([]string, error) { //# Start the nginx container using the default command, but use custom //arguments (arg1 .. argN) for that command. //kubectl run nginx --image=nginx -- ... @@ -388,15 +388,10 @@ func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, imageOCIConfig kubeArgs := containerKubeConfig.Args // merge image config and kube config - // same as docker does today... - if imageOCIConfig != nil { - if len(kubeCommands) == 0 { - if len(kubeArgs) == 0 { - kubeArgs = imageOCIConfig.Config.Cmd - } - if kubeCommands == nil { - kubeCommands = imageOCIConfig.Config.Entrypoint - } + if ociConfig != nil && len(kubeCommands) == 0 { + kubeCommands = ociConfig.Entrypoint + if len(kubeArgs) == 0 { + kubeArgs = ociConfig.Cmd } } @@ -1179,9 +1174,12 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - processArgs, err := buildOCIProcessArgs(containerConfig, containerImageConfig) - if err != nil { - return nil, err + processArgs := []string{} + if containerImageConfig != nil { + processArgs, err := buildOCIProcessArgs(containerConfig, &containerImageConfig.Config) + if err != nil { + return nil, err + } } specgen.SetProcessArgs(processArgs) From c93ca853f319083d9ae8c200d8a077aa754242b3 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 19 Jan 2018 10:28:15 -0800 Subject: [PATCH 2/4] server/container_create: Simplify command/args join The old code was needlessly splitting either kubeCommands or kubeArgs into [0] and [1:]. The new code doesn't bother with that. From [1,2], all we need to do is append kubeArgs to kubeCommands, and we can do that in one line. [1]: https://docs.docker.com/engine/reference/builder/#understand-how-cmd-and-entrypoint-interact [2]: https://github.com/opencontainers/image-spec/blob/v1.0.1/conversion.md#verbatim-fields Signed-off-by: W. Trevor King --- server/container_create.go | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/server/container_create.go b/server/container_create.go index 366cb566..01820d2d 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -399,18 +399,7 @@ func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, ociConfig *v1. return nil, fmt.Errorf("no command specified") } - // create entrypoint and args - var entrypoint string - var args []string - if len(kubeCommands) != 0 { - entrypoint = kubeCommands[0] - args = append(kubeCommands[1:], kubeArgs...) - } else { - entrypoint = kubeArgs[0] - args = kubeArgs[1:] - } - - processArgs := append([]string{entrypoint}, args...) + processArgs := append(kubeCommands, kubeArgs...) logrus.Debugf("OCI process args %v", processArgs) From 375ddf4b3a7f7f1895201483ffcae2106401d0a5 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 19 Jan 2018 10:05:12 -0800 Subject: [PATCH 3/4] server/container_create: Allow for nil Process OCI runtime callers (like CRI-O) are allowed to leave process unset [1] for containers that they do not intend to 'start'. When we don't have any process.args, we *must* leave process unset (because process.args is required [2]). My personal preference would have been to have both process and process.args optional [3], which would have allowed for these settings to be decoupled, but that's not where the spec ended up. When we have no args and are clearing Process, we need to ensure that we don't re-create an args-less structure later on by populating process.user or similar. This commit collects later process-creating calls (e.g. setupContainerUser, which populates process.user) into the "we have some args" branch. This commit leaves earlier process-creating calls (e.g. SetProcessTerminal) where they were. Anything they do inside Process will be clobbered later if we nil it, but that's fine. [1]: https://github.com/opencontainers/runtime-spec/blame/v1.0.1/config.md#L145 [2]: https://github.com/opencontainers/runtime-spec/blame/v1.0.1/config.md#L157 [3]: https://github.com/opencontainers/runtime-spec/pull/701#issue-210601101 Signed-off-by: W. Trevor King --- server/container_create.go | 82 +++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/server/container_create.go b/server/container_create.go index 01820d2d..579e8268 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -395,10 +395,6 @@ func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, ociConfig *v1. } } - if len(kubeCommands) == 0 && len(kubeArgs) == 0 { - return nil, fmt.Errorf("no command specified") - } - processArgs := append(kubeCommands, kubeArgs...) logrus.Debugf("OCI process args %v", processArgs) @@ -1164,40 +1160,53 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } processArgs := []string{} - if containerImageConfig != nil { - processArgs, err := buildOCIProcessArgs(containerConfig, &containerImageConfig.Config) - if err != nil { + if containerImageConfig == nil { + processArgs, err = buildOCIProcessArgs(containerConfig, nil) + } else { + processArgs, err = buildOCIProcessArgs(containerConfig, &containerImageConfig.Config) + } + if err != nil { + return nil, err + } + if len(processArgs) == 0 { + specgen.Spec().Process = nil + } else { + specgen.SetProcessArgs(processArgs) + + envs := mergeEnvs(containerImageConfig, containerConfig.GetEnvs()) + for _, e := range envs { + parts := strings.SplitN(e, "=", 2) + specgen.AddProcessEnv(parts[0], parts[1]) + } + + // Set working directory + // Pick it up from image config first and override if specified in CRI + containerCwd := "/" + if containerImageConfig != nil { + imageCwd := containerImageConfig.Config.WorkingDir + if imageCwd != "" { + containerCwd = imageCwd + } + } + runtimeCwd := containerConfig.WorkingDir + if runtimeCwd != "" { + containerCwd = runtimeCwd + } + specgen.SetProcessCwd(containerCwd) + if err := setupWorkingDirectory(mountPoint, mountLabel, containerCwd); err != nil { + if err1 := s.StorageRuntimeServer().StopContainer(containerID); err1 != nil { + return nil, fmt.Errorf("can't umount container after cwd error %v: %v", err, err1) + } return nil, err } - } - specgen.SetProcessArgs(processArgs) - envs := mergeEnvs(containerImageConfig, containerConfig.GetEnvs()) - for _, e := range envs { - parts := strings.SplitN(e, "=", 2) - specgen.AddProcessEnv(parts[0], parts[1]) - } - - // Set working directory - // Pick it up from image config first and override if specified in CRI - containerCwd := "/" - if containerImageConfig != nil { - imageCwd := containerImageConfig.Config.WorkingDir - if imageCwd != "" { - containerCwd = imageCwd + // Setup user and groups + if linux != nil { + if err = setupContainerUser(&specgen, mountPoint, linux.GetSecurityContext(), containerImageConfig); err != nil { + return nil, err + } } } - runtimeCwd := containerConfig.WorkingDir - if runtimeCwd != "" { - containerCwd = runtimeCwd - } - specgen.SetProcessCwd(containerCwd) - if err := setupWorkingDirectory(mountPoint, mountLabel, containerCwd); err != nil { - if err1 := s.StorageRuntimeServer().StopContainer(containerID); err1 != nil { - return nil, fmt.Errorf("can't umount container after cwd error %v: %v", err, err1) - } - return nil, err - } var secretMounts []rspec.Mount if len(s.config.DefaultMounts) > 0 { @@ -1229,13 +1238,6 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - // Setup user and groups - if linux != nil { - if err = setupContainerUser(&specgen, mountPoint, linux.GetSecurityContext(), containerImageConfig); err != nil { - return nil, err - } - } - // Set up pids limit if pids cgroup is mounted _, err = cgroups.FindCgroupMountpoint("pids") if err == nil { From f41bf4688c4dfa59e2b345336f0ea743b443be7a Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 18 Jan 2018 16:12:44 -0800 Subject: [PATCH 4/4] server/sandbox_run: Drop PodInfraCommand fallback Use buildOCIProcessArgs to calculate process.args considering PauseCommand (if it was set) and the image's configured Entrypoint and Cmd. This improves the previous implementation, which had been ignoring Entrypoint, despite the image-spec recommendations to consider it [1]. There's no reason to assume the pause image contains a /pause command, and it seems more sane to assume that the pause container has configured Entrypoint and Cmd appropriately without our having to repair anything. Now that we're considering Entrypoint, kubernetes/pause (which sets Entrypoint [2]) no longer needs a PauseCommand "override". I've dropped the podContainer.Config nil check because later code is using it without a guard (e.g. podContainer.Config.Config.StopSignal) and that hasn't caused problems. The check was added in c0333b10 (Integrate containers/storage, 2016-10-18, #189), but there was too much going on for a motivational comment in the commit message. When processArgs is empty, I'm setting Process nil (which the runtime spec allows [3]) following createSandboxContainer. If 'start' is called on those containers and errors (as the spec requires [4]), I think that's an "image was broken" problem and not a "cri-o didn't guess the right value" problem. I've shifted the process.args code down within RunPodSandbox where the possible Process nil can clobber the Process object created by SetProcessSelinuxLabel. I didn't see any later configuration that looked like it would create Process (which would require process.args [5]). Down the road we might be able to use image-tools image -> runtime config converter [6]. It's currently a private method with public access restricted to full bundle unpacks. [1]: https://github.com/opencontainers/image-spec/blob/v1.0.1/conversion.md#verbatim-fields [2]: https://github.com/kubernetes/kubernetes/blob/v1.9.2/build/pause/Dockerfile#L18 [3]: https://github.com/opencontainers/runtime-spec/blame/v1.0.1/config.md#L145 [4]: https://github.com/opencontainers/runtime-spec/blame/v1.0.1/runtime.md#L119 [5]: https://github.com/opencontainers/runtime-spec/blame/v1.0.1/config.md#L157 [6]: https://github.com/opencontainers/image-tools/blob/v0.3.0/image/config.go#L68 Signed-off-by: W. Trevor King --- lib/sandbox/sandbox.go | 3 --- server/sandbox_run.go | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/sandbox/sandbox.go b/lib/sandbox/sandbox.go index 7624b072..641e1734 100644 --- a/lib/sandbox/sandbox.go +++ b/lib/sandbox/sandbox.go @@ -168,9 +168,6 @@ const ( // NsRunDir is the default directory in which running network namespaces // are stored NsRunDir = "/var/run/netns" - // PodInfraCommand is the default command when starting a pod infrastructure - // container - PodInfraCommand = "/pause" ) var ( diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 5ba007c2..951e113e 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -186,15 +186,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest // setup defaults for the pod sandbox g.SetRootReadonly(true) - if s.config.PauseCommand == "" { - if podContainer.Config != nil { - g.SetProcessArgs(podContainer.Config.Config.Cmd) - } else { - g.SetProcessArgs([]string{sandbox.PodInfraCommand}) - } - } else { - g.SetProcessArgs([]string{s.config.PauseCommand}) - } // set DNS options if req.GetConfig().GetDnsConfig() != nil { @@ -286,6 +277,20 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.SetProcessSelinuxLabel(processLabel) g.SetLinuxMountLabel(mountLabel) + containerKubeConfig := &pb.ContainerConfig{} + if s.config.PauseCommand != "" { + containerKubeConfig.Command = []string{s.config.PauseCommand} + } + processArgs, err := buildOCIProcessArgs(containerKubeConfig, &podContainer.Config.Config) + if err != nil { + return nil, err + } + if len(processArgs) == 0 { + g.Spec().Process = nil + } else { + g.SetProcessArgs(processArgs) + } + // create shm mount for the pod containers. var shmPath string if securityContext.GetNamespaceOptions().GetHostIpc() {