diff --git a/cmd/crio/config.go b/cmd/crio/config.go index e71969b5..840882d7 100644 --- a/cmd/crio/config.go +++ b/cmd/crio/config.go @@ -45,14 +45,33 @@ stream_port = "{{ .StreamPort }}" # runtime used and options for how to set up and manage the OCI runtime. [crio.runtime] -# runtime is a path to the OCI runtime which crio will be using. +# runtime is the OCI compatible runtime used for trusted container workloads. +# This is a mandatory setting as this runtime will be the default one +# and will also be used for untrusted container workloads if +# runtime_untrusted_workload is not set. runtime = "{{ .Runtime }}" -# runtime_host_privileged is a path to the OCI runtime which crio -# will be using for host privileged operations. -# If this string is empty, crio will not try to use the "runtime" -# for all operations. -runtime_host_privileged = "{{ .RuntimeHostPrivileged }}" +# runtime_untrusted_workload is the OCI compatible runtime used for untrusted +# container workloads. This is an optional setting, except if +# default_container_trust is set to "untrusted". +runtime_untrusted_workload = "{{ .RuntimeUntrustedWorkload }}" + +# default_workload_trust is the default level of trust crio puts in container +# workloads. It can either be "trusted" or "untrusted", and the default +# is "trusted". +# Containers can be run through different container runtimes, depending on +# the trust hints we receive from kubelet: +# - If kubelet tags a container workload as untrusted, crio will try first to +# run it through the untrusted container workload runtime. If it is not set, +# crio will use the trusted runtime. +# - If kubelet does not provide any information about the container workload trust +# level, the selected runtime will depend on the default_container_trust setting. +# If it is set to "untrusted", then all containers except for the host privileged +# ones, will be run by the runtime_untrusted_workload runtime. Host privileged +# containers are by definition trusted and will always use the trusted container +# runtime. If default_container_trust is set to "trusted", crio will use the trusted +# container runtime for all containers. +default_workload_trust = "{{ .DefaultWorkloadTrust }}" # conmon is the path to conmon binary, used for managing the runtime. conmon = "{{ .Conmon }}" diff --git a/oci/container.go b/oci/container.go index 6b99e487..5cedd833 100644 --- a/oci/container.go +++ b/oci/container.go @@ -34,6 +34,7 @@ type Container struct { stdin bool stdinOnce bool privileged bool + trusted bool state *ContainerState metadata *pb.ContainerMetadata opLock sync.Mutex @@ -56,7 +57,7 @@ type ContainerState struct { } // NewContainer creates a container object. -func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, dir string, created time.Time, stopSignal string) (*Container, error) { +func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) { state := &ContainerState{} state.Created = created c := &Container{ @@ -71,6 +72,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net stdin: stdin, stdinOnce: stdinOnce, privileged: privileged, + trusted: trusted, metadata: metadata, annotations: annotations, image: image, diff --git a/oci/oci.go b/oci/oci.go index 8b33141c..d965f6d6 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -31,26 +31,28 @@ const ( ) // New creates a new Runtime with options provided -func New(runtimePath string, runtimeHostPrivilegedPath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { +func New(runtimeTrustedPath string, runtimeUntrustedPath string, trustLevel string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { r := &Runtime{ - name: filepath.Base(runtimePath), - path: runtimePath, - privilegedPath: runtimeHostPrivilegedPath, - conmonPath: conmonPath, - conmonEnv: conmonEnv, - cgroupManager: cgroupManager, + name: filepath.Base(runtimeTrustedPath), + trustedPath: runtimeTrustedPath, + untrustedPath: runtimeUntrustedPath, + trustLevel: trustLevel, + conmonPath: conmonPath, + conmonEnv: conmonEnv, + cgroupManager: cgroupManager, } return r, nil } // Runtime stores the information about a oci runtime type Runtime struct { - name string - path string - privilegedPath string - conmonPath string - conmonEnv []string - cgroupManager string + name string + trustedPath string + untrustedPath string + trustLevel string + conmonPath string + conmonEnv []string + cgroupManager string } // syncInfo is used to return data from monitor process to daemon @@ -70,19 +72,41 @@ func (r *Runtime) Name() string { } // Path returns the full path the OCI Runtime executable. -// Depending if the container is privileged, it will return -// the privileged runtime or not. +// Depending if the container is privileged and/or trusted, +// this will return either the trusted or untrusted runtime path. func (r *Runtime) Path(c *Container) string { - if c.privileged && r.privilegedPath != "" { - return r.privilegedPath + if !c.trusted { + // We have an explicitly untrusted container. + if c.privileged { + logrus.Warnf("Running an untrusted but privileged container") + return r.trustedPath + } + + if r.untrustedPath != "" { + return r.untrustedPath + } + + return r.trustedPath } - return r.path + // Our container is trusted. Let's look at the configured trust level. + if r.trustLevel == "trusted" { + return r.trustedPath + } + + // Our container is trusted, but we are running untrusted. + // We will use the untrusted container runtime if it's set + // and if it's not a privileged container. + if c.privileged || r.untrustedPath == "" { + return r.trustedPath + } + + return r.untrustedPath } // Version returns the version of the OCI Runtime func (r *Runtime) Version() (string, error) { - runtimeVersion, err := getOCIVersion(r.path, "-v") + runtimeVersion, err := getOCIVersion(r.trustedPath, "-v") if err != nil { return "", err } diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go index 40916a25..26ead571 100644 --- a/pkg/annotations/annotations.go +++ b/pkg/annotations/annotations.go @@ -52,6 +52,9 @@ const ( // ShmPath is the shared memory path annotation ShmPath = "io.kubernetes.cri-o.ShmPath" + // TrustedSandbox is the annotation for trusted sandboxes + TrustedSandbox = "io.kubernetes.cri-o.TrustedSandbox" + // TTY is the terminal path annotation TTY = "io.kubernetes.cri-o.TTY" diff --git a/server/config.go b/server/config.go index 99cac43e..32a9a2a3 100644 --- a/server/config.go +++ b/server/config.go @@ -75,14 +75,33 @@ type APIConfig struct { // RuntimeConfig represents the "crio.runtime" TOML config table. type RuntimeConfig struct { - // Runtime is a path to the OCI runtime which crio will be using. Currently - // the only known working choice is runC, simply because the OCI has not - // yet merged a CLI API (so we assume runC's API here). + // Runtime is the OCI compatible runtime used for trusted container workloads. + // This is a mandatory setting as this runtime will be the default one and + // will also be used for untrusted container workloads if + // RuntimeUntrustedWorkload is not set. Runtime string `toml:"runtime"` - // RuntimeHostPrivileged is a path to the OCI runtime which crio will be - // using for host privileged operations. - RuntimeHostPrivileged string `toml:"runtime_host_privileged"` + // RuntimeUntrustedWorkload is the OCI compatible runtime used for untrusted + // container workloads. This is an optional setting, except if + // DefaultWorkloadTrust is set to "untrusted". + RuntimeUntrustedWorkload string `toml:"runtime_untrusted_workload"` + + // DefaultWorkloadTrust is the default level of trust crio puts in container + // workloads. This can either be "trusted" or "untrusted" and the default + // is "trusted" + // Containers can be run through different container runtimes, depending on + // the trust hints we receive from kubelet: + // - If kubelet tags a container workload as untrusted, crio will try first + // to run it through the untrusted container workload runtime. If it is not + // set, crio will use the trusted runtime. + // - If kubelet does not provide any information about the container workload trust + // level, the selected runtime will depend on the DefaultWorkloadTrust setting. + // If it is set to "untrusted", then all containers except for the host privileged + // ones, will be run by the RuntimeUntrustedWorkload runtime. Host privileged + // containers are by definition trusted and will always use the trusted container + // runtime. If DefaultWorkloadTrust is set to "trusted", crio will use the trusted + // container runtime for all containers. + DefaultWorkloadTrust string `toml:"default_workload_trust"` // Conmon is the path to conmon binary, used for managing the runtime. Conmon string `toml:"conmon"` @@ -218,9 +237,11 @@ func DefaultConfig() *Config { StreamPort: "10010", }, RuntimeConfig: RuntimeConfig{ - Runtime: "/usr/bin/runc", - RuntimeHostPrivileged: "", - Conmon: conmonPath, + Runtime: "/usr/bin/runc", + RuntimeUntrustedWorkload: "", + DefaultWorkloadTrust: "trusted", + + Conmon: conmonPath, ConmonEnv: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", }, diff --git a/server/container_create.go b/server/container_create.go index 57624d08..c7ee28b8 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -673,7 +673,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, containerInfo.Dir, created, containerImageConfig.Config.StopSignal) + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, sb.trusted, containerInfo.Dir, created, containerImageConfig.Config.StopSignal) if err != nil { return nil, err } diff --git a/server/sandbox.go b/server/sandbox.go index 0f57f557..fdce6e99 100644 --- a/server/sandbox.go +++ b/server/sandbox.go @@ -143,6 +143,7 @@ type sandbox struct { shmPath string cgroupParent string privileged bool + trusted bool resolvPath string hostname string } diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 1353a616..5b50670e 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -48,6 +48,19 @@ func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool { return false } +// trustedSandbox returns true if the sandbox will run trusted workloads. +func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool { + kubeAnnotations := req.GetConfig().GetAnnotations() + + trustedAnnotation, ok := kubeAnnotations[annotations.TrustedSandbox] + if !ok { + // A sandbox is trusted by default. + return true + } + + return isTrue(trustedAnnotation) +} + func (s *Server) runContainer(container *oci.Container, cgroupParent string) error { if err := s.runtime.CreateContainer(container, cgroupParent); err != nil { return err @@ -277,6 +290,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } privileged := s.privilegedSandbox(req) + trusted := s.trustedSandbox(req) g.AddAnnotation(annotations.Metadata, string(metadataJSON)) g.AddAnnotation(annotations.Labels, string(labelsJSON)) g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON)) @@ -288,6 +302,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation(annotations.ContainerID, id) g.AddAnnotation(annotations.ShmPath, shmPath) g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged)) + g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted)) g.AddAnnotation(annotations.ResolvPath, resolvPath) g.AddAnnotation(annotations.HostName, hostname) g.AddAnnotation(annotations.KubeName, kubeName) @@ -313,6 +328,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest metadata: metadata, shmPath: shmPath, privileged: privileged, + trusted: trusted, resolvPath: resolvPath, hostname: hostname, } @@ -438,7 +454,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err) } - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, podContainer.Dir, created, podContainer.Config.Config.StopSignal) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, sb.trusted, podContainer.Dir, created, podContainer.Config.Config.StopSignal) if err != nil { return nil, err } diff --git a/server/server.go b/server/server.go index 93a81e26..7424bb59 100644 --- a/server/server.go +++ b/server/server.go @@ -152,7 +152,7 @@ func (s *Server) loadContainer(id string) error { return err } - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, sb.trusted, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) if err != nil { return err } @@ -243,6 +243,7 @@ func (s *Server) loadSandbox(id string) error { } privileged := isTrue(m.Annotations[annotations.PrivilegedRuntime]) + trusted := isTrue(m.Annotations[annotations.TrustedSandbox]) sb := &sandbox{ id: id, @@ -257,6 +258,7 @@ func (s *Server) loadSandbox(id string) error { metadata: &metadata, shmPath: m.Annotations[annotations.ShmPath], privileged: privileged, + trusted: trusted, resolvPath: m.Annotations[annotations.ResolvPath], } @@ -308,7 +310,7 @@ func (s *Server) loadSandbox(id string) error { return err } - scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) if err != nil { return err } @@ -563,7 +565,7 @@ func New(config *Config) (*Server, error) { return nil, err } - r, err := oci.New(config.Runtime, config.RuntimeHostPrivileged, config.Conmon, config.ConmonEnv, config.CgroupManager) + r, err := oci.New(config.Runtime, config.RuntimeUntrustedWorkload, config.DefaultWorkloadTrust, config.Conmon, config.ConmonEnv, config.CgroupManager) if err != nil { return nil, err }