From fe66d007d3e4744994eb4464d11c2f5c3efad3a4 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 19 Jan 2018 21:32:11 -0800 Subject: [PATCH] *: Add --pid-namespace=[container|pod|pod-container] So crio callers can pick their PID namespace approach. This seems like something that folks might want to configure per-pod, or possibly even per-container, but for now follow --enable-shared-pid-namespace and make it per-crio-daemon. The "Deprecated:" paragraph approach in Go comments is recommended in [1]. The ${parameter:+word} syntax is in POSIX [2], and ${REDIS_IN_INFRA:+!} resolves to ! when REDIS_IN_INFRA is set non-empty and resolves to nothing when REDIS_IN_INFRA is unset. The ADDITIONAL_CRIO_OPTIONS approach is easier to maintain than the old approach with settings for every option, because we no longer need to maintain defaults in two locations (lib/config.go and test/helpers.bash). For this commit, I've only dropped ENABLE_SHARED_PID_NAMESPACE, but we may want to extend this approach to more variables in the future. [1]: https://blog.golang.org/godoc-documenting-go-code [2]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_02 Signed-off-by: W. Trevor King --- cmd/crio/config.go | 13 ++++++++++++- cmd/crio/main.go | 16 ++++++++++++---- docs/crio.8.md | 5 ++++- docs/crio.conf.5.md | 5 ++++- lib/config.go | 15 ++++++++++++++- lib/container_server.go | 4 ++-- oci/container.go | 4 +++- oci/oci.go | 3 +++ server/container_create.go | 23 ++++++++++++++++++----- server/inspect_test.go | 6 +++--- server/sandbox_run.go | 2 +- test/helpers.bash | 4 +--- test/namespaces.bats | 21 +++++++++++++++++++-- 13 files changed, 96 insertions(+), 25 deletions(-) diff --git a/cmd/crio/config.go b/cmd/crio/config.go index 2564baf1..0556e1ad 100644 --- a/cmd/crio/config.go +++ b/cmd/crio/config.go @@ -114,13 +114,24 @@ default_mounts = [ # pids_limit is the number of processes allowed in a container pids_limit = {{ .PidsLimit }} -# enable using a shared PID namespace for containers in a pod +# enable using a shared PID namespace for containers in a pod. +# Deprecated: use pid_namespace = "pod" instead. enable_shared_pid_namespace = {{ .EnableSharedPIDNamespace }} # log_size_max is the max limit for the container log size in bytes. # Negative values indicate that no limit is imposed. log_size_max = {{ .LogSizeMax }} +# Select the PID namespace scope. Choose from 'container' for all +# containers (including pod infra containers) to have sibling PID +# namespaces (the default), 'pod' for all containers to share a +# single, per-pod namespace, or 'pod-container' to have the pod's +# infra container in one PID namespace with the non-infra containers +# in per-container PID namespaces that are children of the pod's infra +# PID namespace . A 'hostPID' Kubernetes pod specification overrides +# this setting. +pid_namespace = "{{ .PIDNamespace }}" + # The "crio.image" table contains settings pertaining to the # management of OCI images. [crio.image] diff --git a/cmd/crio/main.go b/cmd/crio/main.go index a058f296..e9556bbf 100644 --- a/cmd/crio/main.go +++ b/cmd/crio/main.go @@ -129,12 +129,16 @@ func mergeConfig(config *server.Config, ctx *cli.Context) error { if ctx.GlobalIsSet("default-mounts") { config.DefaultMounts = ctx.GlobalStringSlice("default-mounts") } + if ctx.GlobalIsSet("pid-namespace") { + config.PIDNamespace = ctx.GlobalString("pid-namespace") + } else if ctx.GlobalIsSet("enable-shared-pid-namespace") { + if ctx.GlobalBool("enable-shared-pid-namespace") { + config.PIDNamespace = "pod" + } + } if ctx.GlobalIsSet("pids-limit") { config.PidsLimit = ctx.GlobalInt64("pids-limit") } - if ctx.GlobalIsSet("enable-shared-pid-namespace") { - config.EnableSharedPIDNamespace = ctx.GlobalBool("enable-shared-pid-namespace") - } if ctx.GlobalIsSet("log-size-max") { config.LogSizeMax = ctx.GlobalInt64("log-size-max") } @@ -295,6 +299,10 @@ func main() { Name: "cgroup-manager", Usage: "cgroup manager (cgroupfs or systemd)", }, + cli.StringFlag{ + Name: "pid-namespace", + Usage: "select the PID namespace scope (\"container\" default, \"pod\", or \"pod-container\")", + }, cli.Int64Flag{ Name: "pids-limit", Value: lib.DefaultPidsLimit, @@ -302,7 +310,7 @@ func main() { }, cli.BoolFlag{ Name: "enable-shared-pid-namespace", - Usage: "enable using a shared PID namespace for containers in a pod", + Usage: "enable using a shared PID namespace for containers in a pod. Deprecated: use --pid-namespace=pod instead.", }, cli.Int64Flag{ Name: "log-size-max", diff --git a/docs/crio.8.md b/docs/crio.8.md index 8408978b..55cbca94 100644 --- a/docs/crio.8.md +++ b/docs/crio.8.md @@ -23,6 +23,7 @@ crio [--log-level value] [--pause-command=[value]] [--pause-image=[value]] +[--pid-namespace=[value]] [--registry=[value]] [--root=[value]] [--runroot=[value]] @@ -92,9 +93,11 @@ crio [GLOBAL OPTIONS] config [OPTIONS] **--pause-image**="": Image which contains the pause executable (default: "kubernetes/pause") +**--pid-namespace**="": Select the PID namespace scope. Choose from `container` for all containers (including pod infra containers) to have sibling PID namespaces (the default), `pod` for all containers to share a single, per-pod namespace, or `pod-container` to have the pod's infra container in one PID namespace with the non-infra containers in per-container PID namespaces that are children of the pod's infra PID namespace . A `hostPID` Kubernetes pod specification overrides this setting. + **--pids-limit**="": Maximum number of processes allowed in a container (default: 1024) -**--enable-shared-pid-namespace**="": Enable using a shared PID namespace for containers in a pod (default: false) +**--enable-shared-pid-namespace**="": Enable using a shared PID namespace for containers in a pod. Deprecated: use `--pid-namespace=pod` instead. **--root**="": The crio root dir (default: "/var/lib/containers/storage") diff --git a/docs/crio.conf.5.md b/docs/crio.conf.5.md index 708f26e7..e9f04606 100644 --- a/docs/crio.conf.5.md +++ b/docs/crio.conf.5.md @@ -84,11 +84,14 @@ Example: If it is positive, it must be >= 8192 (to match/exceed conmon read buffer). The file is truncated and re-opened so the limit is never exceeded. +**pid_namespace**="" + Select the PID namespace scope. Choose from `container` for all containers (including pod infra containers) to have sibling PID namespaces (the default), `pod` for all containers to share a single, per-pod namespace, or `pod-container` to have the pod's infra container in one PID namespace with the non-infra containers in per-container PID namespaces that are children of the pod's infra PID namespace . A `hostPID` Kubernetes pod specification overrides this setting. + **pids_limit**="" Maximum number of processes allowed in a container (default: 1024) **enable_shared_pid_namespace**="" - Enable using a shared PID namespace for containers in a pod (default: false) + Enable using a shared PID namespace for containers in a pod. Deprecated: use `pid_namespace = "pod"` instead. **runtime**="" OCI runtime path (default: "/usr/bin/runc") diff --git a/lib/config.go b/lib/config.go index 6a63b2b0..38226501 100644 --- a/lib/config.go +++ b/lib/config.go @@ -121,7 +121,10 @@ type RuntimeConfig struct { // NoPivot instructs the runtime to not use `pivot_root`, but instead use `MS_MOVE` NoPivot bool `toml:"no_pivot"` - // EnableSharePidNamespace instructs the runtime to enable share pid namespace + // EnableSharePidNamespace instructs the runtime to enable share pid + // namespace. + // + // Deprecated: use PIDNamespace = "pod" instead. EnableSharedPIDNamespace bool `toml:"enable_shared_pid_namespace"` // Conmon is the path to conmon binary, used for managing the runtime. @@ -155,6 +158,16 @@ type RuntimeConfig struct { // Hooks List of hooks to run with container Hooks map[string]HookParams + // PIDNamespace selects the PID namespace scope. Choose from + // 'container' for all containers (including pod infra containers) + // to have sibling PID namespaces (the default), 'pod' for all + // containers to share a single, per-pod namespace, or + // 'pod-container' to have the pod's infra container in one PID + // namespace with the non-infra containers in per-container PID + // namespaces that are children of the pod's infra PID namespace . + // A 'hostPID' Kubernetes pod specification overrides this setting. + PIDNamespace string `toml:"pid_namespace"` // FIXME: should this be an enum type? + // PidsLimit is the number of processes each container is restricted to // by the cgroup process number controller. PidsLimit int64 `toml:"pids_limit"` diff --git a/lib/container_server.go b/lib/container_server.go index 9a4704b7..a1993bba 100644 --- a/lib/container_server.go +++ b/lib/container_server.go @@ -389,7 +389,7 @@ func (c *ContainerServer) LoadSandbox(id string) error { return err } - scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.NetNs(), labels, m.Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.NetNs(), "", labels, m.Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) if err != nil { return err } @@ -513,7 +513,7 @@ func (c *ContainerServer) LoadContainer(id string) error { return err } - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.NetNs(), labels, m.Annotations, kubeAnnotations, img, imgName, imgRef, &metadata, sb.ID(), tty, stdin, stdinOnce, sb.Privileged(), sb.Trusted(), containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.NetNs(), "", labels, m.Annotations, kubeAnnotations, img, imgName, imgRef, &metadata, sb.ID(), tty, stdin, stdinOnce, sb.Privileged(), sb.Trusted(), containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) if err != nil { return err } diff --git a/oci/container.go b/oci/container.go index c71152bf..cb40dd34 100644 --- a/oci/container.go +++ b/oci/container.go @@ -31,6 +31,7 @@ type Container struct { image string sandbox string netns ns.NetNS + pidNamespace string terminal bool stdin bool stdinOnce bool @@ -71,7 +72,7 @@ type ContainerState struct { } // NewContainer creates a container object. -func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, crioAnnotations map[string]string, annotations map[string]string, image string, imageName string, imageRef string, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) { +func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, pidNamespace string, labels map[string]string, crioAnnotations map[string]string, annotations map[string]string, image string, imageName string, imageRef string, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) { state := &ContainerState{} state.Created = created c := &Container{ @@ -82,6 +83,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net labels: labels, sandbox: sandbox, netns: netns, + pidNamespace: pidNamespace, terminal: terminal, stdin: stdin, stdinOnce: stdinOnce, diff --git a/oci/oci.go b/oci/oci.go index 658079a3..f9523539 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -186,6 +186,9 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) (err error) if r.noPivot { args = append(args, "--no-pivot") } + if c.pidNamespace != "" { + args = append(args, "--pid-namespace", c.pidNamespace) + } if c.terminal { args = append(args, "-t") } else if c.stdin { diff --git a/server/container_create.go b/server/container_create.go index a4652cf3..9f4fd41d 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -981,13 +981,26 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } + var pidNamespace string if containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() { // kubernetes PodSpec specify to use Host PID namespace specgen.RemoveLinuxNamespace(string(rspec.PIDNamespace)) - } else if s.config.EnableSharedPIDNamespace { - // share Pod PID namespace - pidNsPath := fmt.Sprintf("/proc/%d/ns/pid", podInfraState.Pid) - if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.PIDNamespace), pidNsPath); err != nil { + } else { + if s.config.EnableSharedPIDNamespace && len(s.config.PIDNamespace) == 0 { + s.config.PIDNamespace = "pod" + } + podPIDNsPath := fmt.Sprintf("/proc/%d/ns/pid", podInfraState.Pid) + var pidNsPath string + switch s.config.PIDNamespace { + case "container": + case "pod": + pidNsPath = podPIDNsPath + case "pod-container": + pidNamespace = podPIDNsPath + default: + return nil, fmt.Errorf("unrecognized PID namespace configuration: %s", s.config.PIDNamespace) + } + if err = specgen.AddOrReplaceLinuxNamespace(string(rspec.PIDNamespace), pidNsPath); err != nil { return nil, err } } @@ -1268,7 +1281,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, crioAnnotations := specgen.Spec().Annotations - container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), labels, crioAnnotations, kubeAnnotations, image, imageName, imageRef, metadata, sb.ID(), containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.Privileged(), sb.Trusted(), containerInfo.Dir, created, containerImageConfig.Config.StopSignal) + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), pidNamespace, labels, crioAnnotations, kubeAnnotations, image, imageName, imageRef, metadata, sb.ID(), containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.Privileged(), sb.Trusted(), containerInfo.Dir, created, containerImageConfig.Config.StopSignal) if err != nil { return nil, err } diff --git a/server/inspect_test.go b/server/inspect_test.go index 7246ef86..93874d54 100644 --- a/server/inspect_test.go +++ b/server/inspect_test.go @@ -67,7 +67,7 @@ func TestGetContainerInfo(t *testing.T) { "io.kubernetes.test1": "value1", } getContainerFunc := func(id string) *oci.Container { - container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "image", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") + container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, "", labels, annotations, annotations, "image", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") if err != nil { t.Fatal(err) } @@ -184,7 +184,7 @@ func TestGetContainerInfoCtrStateNil(t *testing.T) { labels := map[string]string{} annotations := map[string]string{} getContainerFunc := func(id string) *oci.Container { - container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") + container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, "", labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") if err != nil { t.Fatal(err) } @@ -215,7 +215,7 @@ func TestGetContainerInfoSandboxNotFound(t *testing.T) { labels := map[string]string{} annotations := map[string]string{} getContainerFunc := func(id string) *oci.Container { - container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") + container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, "", labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") if err != nil { t.Fatal(err) } diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 5ba007c2..d9ef6c58 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -499,7 +499,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation(annotations.HostnamePath, hostnamePath) sb.AddHostnamePath(hostnamePath) - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), "", labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal) if err != nil { return nil, err } diff --git a/test/helpers.bash b/test/helpers.bash index a0c715e1..cad56b9a 100644 --- a/test/helpers.bash +++ b/test/helpers.bash @@ -56,8 +56,6 @@ IMAGE_VOLUMES=${IMAGE_VOLUMES:-mkdir} PIDS_LIMIT=${PIDS_LIMIT:-1024} # Log size max limit LOG_SIZE_MAX_LIMIT=${LOG_SIZE_MAX_LIMIT:--1} -# enable share container pid namespace -ENABLE_SHARED_PID_NAMESPACE=${ENABLE_SHARED_PID_NAMESPACE:-false} TESTDIR=$(mktemp -d) @@ -217,7 +215,7 @@ function start_crio() { "$COPYIMG_BINARY" --root "$TESTDIR/crio" $STORAGE_OPTIONS --runroot "$TESTDIR/crio-run" --image-name=docker.io/mrunalp/image-volume-test:latest --import-from=dir:"$ARTIFACTS_PATH"/image-volume-test-image --signature-policy="$INTEGRATION_ROOT"/policy.json "$COPYIMG_BINARY" --root "$TESTDIR/crio" $STORAGE_OPTIONS --runroot "$TESTDIR/crio-run" --image-name=docker.io/library/busybox:latest --import-from=dir:"$ARTIFACTS_PATH"/busybox-image --signature-policy="$INTEGRATION_ROOT"/policy.json "$COPYIMG_BINARY" --root "$TESTDIR/crio" $STORAGE_OPTIONS --runroot "$TESTDIR/crio-run" --image-name=docker.io/runcom/stderr-test:latest --import-from=dir:"$ARTIFACTS_PATH"/stderr-test --signature-policy="$INTEGRATION_ROOT"/policy.json - "$CRIO_BINARY" ${DEFAULT_MOUNTS_OPTS} ${HOOKS_OPTS} --conmon "$CONMON_BINARY" --listen "$CRIO_SOCKET" --cgroup-manager "$CGROUP_MANAGER" --registry "docker.io" --runtime "$RUNTIME_BINARY" --root "$TESTDIR/crio" --runroot "$TESTDIR/crio-run" $STORAGE_OPTIONS --seccomp-profile "$seccomp" --apparmor-profile "$apparmor" --cni-config-dir "$CRIO_CNI_CONFIG" --cni-plugin-dir "$CRIO_CNI_PLUGIN" --signature-policy "$INTEGRATION_ROOT"/policy.json --image-volumes "$IMAGE_VOLUMES" --pids-limit "$PIDS_LIMIT" --enable-shared-pid-namespace=${ENABLE_SHARED_PID_NAMESPACE} --log-size-max "$LOG_SIZE_MAX_LIMIT" --config /dev/null config >$CRIO_CONFIG + "$CRIO_BINARY" ${DEFAULT_MOUNTS_OPTS} ${HOOKS_OPTS} --conmon "$CONMON_BINARY" --listen "$CRIO_SOCKET" --cgroup-manager "$CGROUP_MANAGER" --registry "docker.io" --runtime "$RUNTIME_BINARY" --root "$TESTDIR/crio" --runroot "$TESTDIR/crio-run" $STORAGE_OPTIONS --seccomp-profile "$seccomp" --apparmor-profile "$apparmor" --cni-config-dir "$CRIO_CNI_CONFIG" --cni-plugin-dir "$CRIO_CNI_PLUGIN" --signature-policy "$INTEGRATION_ROOT"/policy.json --image-volumes "$IMAGE_VOLUMES" --pids-limit "$PIDS_LIMIT" --log-size-max "$LOG_SIZE_MAX_LIMIT" $ADDITIONAL_CRIO_OPTIONS --config /dev/null config >$CRIO_CONFIG # Prepare the CNI configuration files, we're running with non host networking by default if [[ -n "$4" ]]; then diff --git a/test/namespaces.bats b/test/namespaces.bats index 033cbab2..60b0b0da 100644 --- a/test/namespaces.bats +++ b/test/namespaces.bats @@ -25,6 +25,11 @@ function pid_namespace_test() { [ "$status" -eq 0 ] [[ "$output" =~ "${EXPECTED_INIT:-redis}" ]] + run crictl exec --sync "$pod_id" cat /proc/*/cmdline + echo "$output" + [ "$status" -eq 0 ] + [[ ${REDIS_IN_INFRA:+!} "$output" =~ "redis" ]] + run crictl stops "$pod_id" echo "$output" [ "$status" -eq 0 ] @@ -36,10 +41,22 @@ function pid_namespace_test() { stop_crio } +@test "container pid namespace" { + ADDITIONAL_CRIO_OPTIONS=--pid-namespace=container pid_namespace_test +} + +@test "pod pid namespace" { + ADDITIONAL_CRIO_OPTIONS=--pid-namespace=pod REDIS_IN_INFRA=1 EXPECTED_INIT=pause pid_namespace_test +} + +@test "pod-container pid namespace" { + ADDITIONAL_CRIO_OPTIONS=--pid-namespace=pod REDIS_IN_INFRA=1 pid_namespace_test +} + @test "pod disable shared pid namespace" { - ENABLE_SHARED_PID_NAMESPACE=false pid_namespace_test + ADDITIONAL_CRIO_OPTIONS=--enable-shared-pid-namespace=false pid_namespace_test } @test "pod enable shared pid namespace" { - ENABLE_SHARED_PID_NAMESPACE=true EXPECTED_INIT=pause pid_namespace_test + ADDITIONAL_CRIO_OPTIONS=--enable-shared-pid-namespace=true EXPECTED_INIT=pause pid_namespace_test }