*: Add --pid-namespace=[container|pod|pod-container]

So crio callers can pick their PID namespace approach.  This seems
like something that folks might want to configure per-pod, or possibly
even per-container, but for now follow --enable-shared-pid-namespace
and make it per-crio-daemon.

The "Deprecated:" paragraph approach in Go comments is recommended in
[1].

The ${parameter:+word} syntax is in POSIX [2], and
${REDIS_IN_INFRA:+!}  resolves to ! when REDIS_IN_INFRA is set
non-empty and resolves to nothing when REDIS_IN_INFRA is unset.

The ADDITIONAL_CRIO_OPTIONS approach is easier to maintain than the
old approach with settings for every option, because we no longer need
to maintain defaults in two locations (lib/config.go and
test/helpers.bash).  For this commit, I've only dropped
ENABLE_SHARED_PID_NAMESPACE, but we may want to extend this approach
to more variables in the future.

[1]: https://blog.golang.org/godoc-documenting-go-code
[2]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_02

Signed-off-by: W. Trevor King <wking@tremily.us>
This commit is contained in:
W. Trevor King 2018-01-19 21:32:11 -08:00
parent 6bf8f2e920
commit fe66d007d3
13 changed files with 96 additions and 25 deletions

View file

@ -114,13 +114,24 @@ default_mounts = [
# pids_limit is the number of processes allowed in a container
pids_limit = {{ .PidsLimit }}
# enable using a shared PID namespace for containers in a pod
# enable using a shared PID namespace for containers in a pod.
# Deprecated: use pid_namespace = "pod" instead.
enable_shared_pid_namespace = {{ .EnableSharedPIDNamespace }}
# log_size_max is the max limit for the container log size in bytes.
# Negative values indicate that no limit is imposed.
log_size_max = {{ .LogSizeMax }}
# Select the PID namespace scope. Choose from 'container' for all
# containers (including pod infra containers) to have sibling PID
# namespaces (the default), 'pod' for all containers to share a
# single, per-pod namespace, or 'pod-container' to have the pod's
# infra container in one PID namespace with the non-infra containers
# in per-container PID namespaces that are children of the pod's infra
# PID namespace . A 'hostPID' Kubernetes pod specification overrides
# this setting.
pid_namespace = "{{ .PIDNamespace }}"
# The "crio.image" table contains settings pertaining to the
# management of OCI images.
[crio.image]

View file

@ -129,12 +129,16 @@ func mergeConfig(config *server.Config, ctx *cli.Context) error {
if ctx.GlobalIsSet("default-mounts") {
config.DefaultMounts = ctx.GlobalStringSlice("default-mounts")
}
if ctx.GlobalIsSet("pid-namespace") {
config.PIDNamespace = ctx.GlobalString("pid-namespace")
} else if ctx.GlobalIsSet("enable-shared-pid-namespace") {
if ctx.GlobalBool("enable-shared-pid-namespace") {
config.PIDNamespace = "pod"
}
}
if ctx.GlobalIsSet("pids-limit") {
config.PidsLimit = ctx.GlobalInt64("pids-limit")
}
if ctx.GlobalIsSet("enable-shared-pid-namespace") {
config.EnableSharedPIDNamespace = ctx.GlobalBool("enable-shared-pid-namespace")
}
if ctx.GlobalIsSet("log-size-max") {
config.LogSizeMax = ctx.GlobalInt64("log-size-max")
}
@ -295,6 +299,10 @@ func main() {
Name: "cgroup-manager",
Usage: "cgroup manager (cgroupfs or systemd)",
},
cli.StringFlag{
Name: "pid-namespace",
Usage: "select the PID namespace scope (\"container\" default, \"pod\", or \"pod-container\")",
},
cli.Int64Flag{
Name: "pids-limit",
Value: lib.DefaultPidsLimit,
@ -302,7 +310,7 @@ func main() {
},
cli.BoolFlag{
Name: "enable-shared-pid-namespace",
Usage: "enable using a shared PID namespace for containers in a pod",
Usage: "enable using a shared PID namespace for containers in a pod. Deprecated: use --pid-namespace=pod instead.",
},
cli.Int64Flag{
Name: "log-size-max",

View file

@ -23,6 +23,7 @@ crio
[--log-level value]
[--pause-command=[value]]
[--pause-image=[value]]
[--pid-namespace=[value]]
[--registry=[value]]
[--root=[value]]
[--runroot=[value]]
@ -92,9 +93,11 @@ crio [GLOBAL OPTIONS] config [OPTIONS]
**--pause-image**="": Image which contains the pause executable (default: "kubernetes/pause")
**--pid-namespace**="": Select the PID namespace scope. Choose from `container` for all containers (including pod infra containers) to have sibling PID namespaces (the default), `pod` for all containers to share a single, per-pod namespace, or `pod-container` to have the pod's infra container in one PID namespace with the non-infra containers in per-container PID namespaces that are children of the pod's infra PID namespace . A `hostPID` Kubernetes pod specification overrides this setting.
**--pids-limit**="": Maximum number of processes allowed in a container (default: 1024)
**--enable-shared-pid-namespace**="": Enable using a shared PID namespace for containers in a pod (default: false)
**--enable-shared-pid-namespace**="": Enable using a shared PID namespace for containers in a pod. Deprecated: use `--pid-namespace=pod` instead.
**--root**="": The crio root dir (default: "/var/lib/containers/storage")

View file

@ -84,11 +84,14 @@ Example:
If it is positive, it must be >= 8192 (to match/exceed conmon read buffer).
The file is truncated and re-opened so the limit is never exceeded.
**pid_namespace**=""
Select the PID namespace scope. Choose from `container` for all containers (including pod infra containers) to have sibling PID namespaces (the default), `pod` for all containers to share a single, per-pod namespace, or `pod-container` to have the pod's infra container in one PID namespace with the non-infra containers in per-container PID namespaces that are children of the pod's infra PID namespace . A `hostPID` Kubernetes pod specification overrides this setting.
**pids_limit**=""
Maximum number of processes allowed in a container (default: 1024)
**enable_shared_pid_namespace**=""
Enable using a shared PID namespace for containers in a pod (default: false)
Enable using a shared PID namespace for containers in a pod. Deprecated: use `pid_namespace = "pod"` instead.
**runtime**=""
OCI runtime path (default: "/usr/bin/runc")

View file

@ -121,7 +121,10 @@ type RuntimeConfig struct {
// NoPivot instructs the runtime to not use `pivot_root`, but instead use `MS_MOVE`
NoPivot bool `toml:"no_pivot"`
// EnableSharePidNamespace instructs the runtime to enable share pid namespace
// EnableSharePidNamespace instructs the runtime to enable share pid
// namespace.
//
// Deprecated: use PIDNamespace = "pod" instead.
EnableSharedPIDNamespace bool `toml:"enable_shared_pid_namespace"`
// Conmon is the path to conmon binary, used for managing the runtime.
@ -155,6 +158,16 @@ type RuntimeConfig struct {
// Hooks List of hooks to run with container
Hooks map[string]HookParams
// PIDNamespace selects the PID namespace scope. Choose from
// 'container' for all containers (including pod infra containers)
// to have sibling PID namespaces (the default), 'pod' for all
// containers to share a single, per-pod namespace, or
// 'pod-container' to have the pod's infra container in one PID
// namespace with the non-infra containers in per-container PID
// namespaces that are children of the pod's infra PID namespace .
// A 'hostPID' Kubernetes pod specification overrides this setting.
PIDNamespace string `toml:"pid_namespace"` // FIXME: should this be an enum type?
// PidsLimit is the number of processes each container is restricted to
// by the cgroup process number controller.
PidsLimit int64 `toml:"pids_limit"`

View file

@ -389,7 +389,7 @@ func (c *ContainerServer) LoadSandbox(id string) error {
return err
}
scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.NetNs(), labels, m.Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.NetNs(), "", labels, m.Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
if err != nil {
return err
}
@ -513,7 +513,7 @@ func (c *ContainerServer) LoadContainer(id string) error {
return err
}
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.NetNs(), labels, m.Annotations, kubeAnnotations, img, imgName, imgRef, &metadata, sb.ID(), tty, stdin, stdinOnce, sb.Privileged(), sb.Trusted(), containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.NetNs(), "", labels, m.Annotations, kubeAnnotations, img, imgName, imgRef, &metadata, sb.ID(), tty, stdin, stdinOnce, sb.Privileged(), sb.Trusted(), containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
if err != nil {
return err
}

View file

@ -31,6 +31,7 @@ type Container struct {
image string
sandbox string
netns ns.NetNS
pidNamespace string
terminal bool
stdin bool
stdinOnce bool
@ -71,7 +72,7 @@ type ContainerState struct {
}
// NewContainer creates a container object.
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, crioAnnotations map[string]string, annotations map[string]string, image string, imageName string, imageRef string, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) {
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, pidNamespace string, labels map[string]string, crioAnnotations map[string]string, annotations map[string]string, image string, imageName string, imageRef string, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) {
state := &ContainerState{}
state.Created = created
c := &Container{
@ -82,6 +83,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net
labels: labels,
sandbox: sandbox,
netns: netns,
pidNamespace: pidNamespace,
terminal: terminal,
stdin: stdin,
stdinOnce: stdinOnce,

View file

@ -186,6 +186,9 @@ func (r *Runtime) CreateContainer(c *Container, cgroupParent string) (err error)
if r.noPivot {
args = append(args, "--no-pivot")
}
if c.pidNamespace != "" {
args = append(args, "--pid-namespace", c.pidNamespace)
}
if c.terminal {
args = append(args, "-t")
} else if c.stdin {

View file

@ -981,13 +981,26 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
return nil, err
}
var pidNamespace string
if containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
// kubernetes PodSpec specify to use Host PID namespace
specgen.RemoveLinuxNamespace(string(rspec.PIDNamespace))
} else if s.config.EnableSharedPIDNamespace {
// share Pod PID namespace
pidNsPath := fmt.Sprintf("/proc/%d/ns/pid", podInfraState.Pid)
if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.PIDNamespace), pidNsPath); err != nil {
} else {
if s.config.EnableSharedPIDNamespace && len(s.config.PIDNamespace) == 0 {
s.config.PIDNamespace = "pod"
}
podPIDNsPath := fmt.Sprintf("/proc/%d/ns/pid", podInfraState.Pid)
var pidNsPath string
switch s.config.PIDNamespace {
case "container":
case "pod":
pidNsPath = podPIDNsPath
case "pod-container":
pidNamespace = podPIDNsPath
default:
return nil, fmt.Errorf("unrecognized PID namespace configuration: %s", s.config.PIDNamespace)
}
if err = specgen.AddOrReplaceLinuxNamespace(string(rspec.PIDNamespace), pidNsPath); err != nil {
return nil, err
}
}
@ -1268,7 +1281,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
crioAnnotations := specgen.Spec().Annotations
container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), labels, crioAnnotations, kubeAnnotations, image, imageName, imageRef, metadata, sb.ID(), containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.Privileged(), sb.Trusted(), containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), pidNamespace, labels, crioAnnotations, kubeAnnotations, image, imageName, imageRef, metadata, sb.ID(), containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.Privileged(), sb.Trusted(), containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
if err != nil {
return nil, err
}

View file

@ -67,7 +67,7 @@ func TestGetContainerInfo(t *testing.T) {
"io.kubernetes.test1": "value1",
}
getContainerFunc := func(id string) *oci.Container {
container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "image", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, "", labels, annotations, annotations, "image", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
if err != nil {
t.Fatal(err)
}
@ -184,7 +184,7 @@ func TestGetContainerInfoCtrStateNil(t *testing.T) {
labels := map[string]string{}
annotations := map[string]string{}
getContainerFunc := func(id string) *oci.Container {
container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, "", labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
if err != nil {
t.Fatal(err)
}
@ -215,7 +215,7 @@ func TestGetContainerInfoSandboxNotFound(t *testing.T) {
labels := map[string]string{}
annotations := map[string]string{}
getContainerFunc := func(id string) *oci.Container {
container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, "", labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
if err != nil {
t.Fatal(err)
}

View file

@ -499,7 +499,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
g.AddAnnotation(annotations.HostnamePath, hostnamePath)
sb.AddHostnamePath(hostnamePath)
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal)
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), "", labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal)
if err != nil {
return nil, err
}

View file

@ -56,8 +56,6 @@ IMAGE_VOLUMES=${IMAGE_VOLUMES:-mkdir}
PIDS_LIMIT=${PIDS_LIMIT:-1024}
# Log size max limit
LOG_SIZE_MAX_LIMIT=${LOG_SIZE_MAX_LIMIT:--1}
# enable share container pid namespace
ENABLE_SHARED_PID_NAMESPACE=${ENABLE_SHARED_PID_NAMESPACE:-false}
TESTDIR=$(mktemp -d)
@ -217,7 +215,7 @@ function start_crio() {
"$COPYIMG_BINARY" --root "$TESTDIR/crio" $STORAGE_OPTIONS --runroot "$TESTDIR/crio-run" --image-name=docker.io/mrunalp/image-volume-test:latest --import-from=dir:"$ARTIFACTS_PATH"/image-volume-test-image --signature-policy="$INTEGRATION_ROOT"/policy.json
"$COPYIMG_BINARY" --root "$TESTDIR/crio" $STORAGE_OPTIONS --runroot "$TESTDIR/crio-run" --image-name=docker.io/library/busybox:latest --import-from=dir:"$ARTIFACTS_PATH"/busybox-image --signature-policy="$INTEGRATION_ROOT"/policy.json
"$COPYIMG_BINARY" --root "$TESTDIR/crio" $STORAGE_OPTIONS --runroot "$TESTDIR/crio-run" --image-name=docker.io/runcom/stderr-test:latest --import-from=dir:"$ARTIFACTS_PATH"/stderr-test --signature-policy="$INTEGRATION_ROOT"/policy.json
"$CRIO_BINARY" ${DEFAULT_MOUNTS_OPTS} ${HOOKS_OPTS} --conmon "$CONMON_BINARY" --listen "$CRIO_SOCKET" --cgroup-manager "$CGROUP_MANAGER" --registry "docker.io" --runtime "$RUNTIME_BINARY" --root "$TESTDIR/crio" --runroot "$TESTDIR/crio-run" $STORAGE_OPTIONS --seccomp-profile "$seccomp" --apparmor-profile "$apparmor" --cni-config-dir "$CRIO_CNI_CONFIG" --cni-plugin-dir "$CRIO_CNI_PLUGIN" --signature-policy "$INTEGRATION_ROOT"/policy.json --image-volumes "$IMAGE_VOLUMES" --pids-limit "$PIDS_LIMIT" --enable-shared-pid-namespace=${ENABLE_SHARED_PID_NAMESPACE} --log-size-max "$LOG_SIZE_MAX_LIMIT" --config /dev/null config >$CRIO_CONFIG
"$CRIO_BINARY" ${DEFAULT_MOUNTS_OPTS} ${HOOKS_OPTS} --conmon "$CONMON_BINARY" --listen "$CRIO_SOCKET" --cgroup-manager "$CGROUP_MANAGER" --registry "docker.io" --runtime "$RUNTIME_BINARY" --root "$TESTDIR/crio" --runroot "$TESTDIR/crio-run" $STORAGE_OPTIONS --seccomp-profile "$seccomp" --apparmor-profile "$apparmor" --cni-config-dir "$CRIO_CNI_CONFIG" --cni-plugin-dir "$CRIO_CNI_PLUGIN" --signature-policy "$INTEGRATION_ROOT"/policy.json --image-volumes "$IMAGE_VOLUMES" --pids-limit "$PIDS_LIMIT" --log-size-max "$LOG_SIZE_MAX_LIMIT" $ADDITIONAL_CRIO_OPTIONS --config /dev/null config >$CRIO_CONFIG
# Prepare the CNI configuration files, we're running with non host networking by default
if [[ -n "$4" ]]; then

View file

@ -25,6 +25,11 @@ function pid_namespace_test() {
[ "$status" -eq 0 ]
[[ "$output" =~ "${EXPECTED_INIT:-redis}" ]]
run crictl exec --sync "$pod_id" cat /proc/*/cmdline
echo "$output"
[ "$status" -eq 0 ]
[[ ${REDIS_IN_INFRA:+!} "$output" =~ "redis" ]]
run crictl stops "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
@ -36,10 +41,22 @@ function pid_namespace_test() {
stop_crio
}
@test "container pid namespace" {
ADDITIONAL_CRIO_OPTIONS=--pid-namespace=container pid_namespace_test
}
@test "pod pid namespace" {
ADDITIONAL_CRIO_OPTIONS=--pid-namespace=pod REDIS_IN_INFRA=1 EXPECTED_INIT=pause pid_namespace_test
}
@test "pod-container pid namespace" {
ADDITIONAL_CRIO_OPTIONS=--pid-namespace=pod REDIS_IN_INFRA=1 pid_namespace_test
}
@test "pod disable shared pid namespace" {
ENABLE_SHARED_PID_NAMESPACE=false pid_namespace_test
ADDITIONAL_CRIO_OPTIONS=--enable-shared-pid-namespace=false pid_namespace_test
}
@test "pod enable shared pid namespace" {
ENABLE_SHARED_PID_NAMESPACE=true EXPECTED_INIT=pause pid_namespace_test
ADDITIONAL_CRIO_OPTIONS=--enable-shared-pid-namespace=true EXPECTED_INIT=pause pid_namespace_test
}