From 0e51bbb7786bc602d1c779910c370edf3e7121ee Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 2 Jun 2017 23:15:19 +0200 Subject: [PATCH 1/2] oci: Support mixing trusted and untrusted workloads Container runtimes provide different levels of isolation, from kernel namespaces to hardware virtualization. When starting a specific container, one may want to decide which level of isolation to use depending on how much we trust the container workload. Fully verified and signed containers may not need the hardware isolation layer but e.g. CI jobs pulling packages from many untrusted sources should probably not run only on a kernel namespace isolation layer. Here we allow CRI-O users to define a container runtime for trusted containers and another one for untrusted containers, and also to define a general, default trust level. This anticipates future kubelet implementations that would be able to tag containers as trusted or untrusted. When missing a kubelet hint, containers are trusted by default. A container becomes untrusted if we get a hint in that direction from kubelet or if the default trust level is set to "untrusted" and the container is not privileged. In both cases CRI-O will try to use the untrusted container runtime. For any other cases, it will switch to the trusted one. Signed-off-by: Samuel Ortiz --- cmd/crio/config.go | 31 +++++++++++++---- oci/container.go | 4 ++- oci/oci.go | 62 +++++++++++++++++++++++----------- pkg/annotations/annotations.go | 3 ++ server/config.go | 39 ++++++++++++++++----- server/container_create.go | 2 +- server/sandbox.go | 1 + server/sandbox_run.go | 10 +++++- server/server.go | 8 +++-- 9 files changed, 120 insertions(+), 40 deletions(-) diff --git a/cmd/crio/config.go b/cmd/crio/config.go index e71969b5..840882d7 100644 --- a/cmd/crio/config.go +++ b/cmd/crio/config.go @@ -45,14 +45,33 @@ stream_port = "{{ .StreamPort }}" # runtime used and options for how to set up and manage the OCI runtime. [crio.runtime] -# runtime is a path to the OCI runtime which crio will be using. +# runtime is the OCI compatible runtime used for trusted container workloads. +# This is a mandatory setting as this runtime will be the default one +# and will also be used for untrusted container workloads if +# runtime_untrusted_workload is not set. runtime = "{{ .Runtime }}" -# runtime_host_privileged is a path to the OCI runtime which crio -# will be using for host privileged operations. -# If this string is empty, crio will not try to use the "runtime" -# for all operations. -runtime_host_privileged = "{{ .RuntimeHostPrivileged }}" +# runtime_untrusted_workload is the OCI compatible runtime used for untrusted +# container workloads. This is an optional setting, except if +# default_container_trust is set to "untrusted". +runtime_untrusted_workload = "{{ .RuntimeUntrustedWorkload }}" + +# default_workload_trust is the default level of trust crio puts in container +# workloads. It can either be "trusted" or "untrusted", and the default +# is "trusted". +# Containers can be run through different container runtimes, depending on +# the trust hints we receive from kubelet: +# - If kubelet tags a container workload as untrusted, crio will try first to +# run it through the untrusted container workload runtime. If it is not set, +# crio will use the trusted runtime. +# - If kubelet does not provide any information about the container workload trust +# level, the selected runtime will depend on the default_container_trust setting. +# If it is set to "untrusted", then all containers except for the host privileged +# ones, will be run by the runtime_untrusted_workload runtime. Host privileged +# containers are by definition trusted and will always use the trusted container +# runtime. If default_container_trust is set to "trusted", crio will use the trusted +# container runtime for all containers. +default_workload_trust = "{{ .DefaultWorkloadTrust }}" # conmon is the path to conmon binary, used for managing the runtime. conmon = "{{ .Conmon }}" diff --git a/oci/container.go b/oci/container.go index 6b99e487..5cedd833 100644 --- a/oci/container.go +++ b/oci/container.go @@ -34,6 +34,7 @@ type Container struct { stdin bool stdinOnce bool privileged bool + trusted bool state *ContainerState metadata *pb.ContainerMetadata opLock sync.Mutex @@ -56,7 +57,7 @@ type ContainerState struct { } // NewContainer creates a container object. -func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, dir string, created time.Time, stopSignal string) (*Container, error) { +func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) { state := &ContainerState{} state.Created = created c := &Container{ @@ -71,6 +72,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net stdin: stdin, stdinOnce: stdinOnce, privileged: privileged, + trusted: trusted, metadata: metadata, annotations: annotations, image: image, diff --git a/oci/oci.go b/oci/oci.go index 8b33141c..d965f6d6 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -31,26 +31,28 @@ const ( ) // New creates a new Runtime with options provided -func New(runtimePath string, runtimeHostPrivilegedPath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { +func New(runtimeTrustedPath string, runtimeUntrustedPath string, trustLevel string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { r := &Runtime{ - name: filepath.Base(runtimePath), - path: runtimePath, - privilegedPath: runtimeHostPrivilegedPath, - conmonPath: conmonPath, - conmonEnv: conmonEnv, - cgroupManager: cgroupManager, + name: filepath.Base(runtimeTrustedPath), + trustedPath: runtimeTrustedPath, + untrustedPath: runtimeUntrustedPath, + trustLevel: trustLevel, + conmonPath: conmonPath, + conmonEnv: conmonEnv, + cgroupManager: cgroupManager, } return r, nil } // Runtime stores the information about a oci runtime type Runtime struct { - name string - path string - privilegedPath string - conmonPath string - conmonEnv []string - cgroupManager string + name string + trustedPath string + untrustedPath string + trustLevel string + conmonPath string + conmonEnv []string + cgroupManager string } // syncInfo is used to return data from monitor process to daemon @@ -70,19 +72,41 @@ func (r *Runtime) Name() string { } // Path returns the full path the OCI Runtime executable. -// Depending if the container is privileged, it will return -// the privileged runtime or not. +// Depending if the container is privileged and/or trusted, +// this will return either the trusted or untrusted runtime path. func (r *Runtime) Path(c *Container) string { - if c.privileged && r.privilegedPath != "" { - return r.privilegedPath + if !c.trusted { + // We have an explicitly untrusted container. + if c.privileged { + logrus.Warnf("Running an untrusted but privileged container") + return r.trustedPath + } + + if r.untrustedPath != "" { + return r.untrustedPath + } + + return r.trustedPath } - return r.path + // Our container is trusted. Let's look at the configured trust level. + if r.trustLevel == "trusted" { + return r.trustedPath + } + + // Our container is trusted, but we are running untrusted. + // We will use the untrusted container runtime if it's set + // and if it's not a privileged container. + if c.privileged || r.untrustedPath == "" { + return r.trustedPath + } + + return r.untrustedPath } // Version returns the version of the OCI Runtime func (r *Runtime) Version() (string, error) { - runtimeVersion, err := getOCIVersion(r.path, "-v") + runtimeVersion, err := getOCIVersion(r.trustedPath, "-v") if err != nil { return "", err } diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go index 40916a25..26ead571 100644 --- a/pkg/annotations/annotations.go +++ b/pkg/annotations/annotations.go @@ -52,6 +52,9 @@ const ( // ShmPath is the shared memory path annotation ShmPath = "io.kubernetes.cri-o.ShmPath" + // TrustedSandbox is the annotation for trusted sandboxes + TrustedSandbox = "io.kubernetes.cri-o.TrustedSandbox" + // TTY is the terminal path annotation TTY = "io.kubernetes.cri-o.TTY" diff --git a/server/config.go b/server/config.go index 99cac43e..32a9a2a3 100644 --- a/server/config.go +++ b/server/config.go @@ -75,14 +75,33 @@ type APIConfig struct { // RuntimeConfig represents the "crio.runtime" TOML config table. type RuntimeConfig struct { - // Runtime is a path to the OCI runtime which crio will be using. Currently - // the only known working choice is runC, simply because the OCI has not - // yet merged a CLI API (so we assume runC's API here). + // Runtime is the OCI compatible runtime used for trusted container workloads. + // This is a mandatory setting as this runtime will be the default one and + // will also be used for untrusted container workloads if + // RuntimeUntrustedWorkload is not set. Runtime string `toml:"runtime"` - // RuntimeHostPrivileged is a path to the OCI runtime which crio will be - // using for host privileged operations. - RuntimeHostPrivileged string `toml:"runtime_host_privileged"` + // RuntimeUntrustedWorkload is the OCI compatible runtime used for untrusted + // container workloads. This is an optional setting, except if + // DefaultWorkloadTrust is set to "untrusted". + RuntimeUntrustedWorkload string `toml:"runtime_untrusted_workload"` + + // DefaultWorkloadTrust is the default level of trust crio puts in container + // workloads. This can either be "trusted" or "untrusted" and the default + // is "trusted" + // Containers can be run through different container runtimes, depending on + // the trust hints we receive from kubelet: + // - If kubelet tags a container workload as untrusted, crio will try first + // to run it through the untrusted container workload runtime. If it is not + // set, crio will use the trusted runtime. + // - If kubelet does not provide any information about the container workload trust + // level, the selected runtime will depend on the DefaultWorkloadTrust setting. + // If it is set to "untrusted", then all containers except for the host privileged + // ones, will be run by the RuntimeUntrustedWorkload runtime. Host privileged + // containers are by definition trusted and will always use the trusted container + // runtime. If DefaultWorkloadTrust is set to "trusted", crio will use the trusted + // container runtime for all containers. + DefaultWorkloadTrust string `toml:"default_workload_trust"` // Conmon is the path to conmon binary, used for managing the runtime. Conmon string `toml:"conmon"` @@ -218,9 +237,11 @@ func DefaultConfig() *Config { StreamPort: "10010", }, RuntimeConfig: RuntimeConfig{ - Runtime: "/usr/bin/runc", - RuntimeHostPrivileged: "", - Conmon: conmonPath, + Runtime: "/usr/bin/runc", + RuntimeUntrustedWorkload: "", + DefaultWorkloadTrust: "trusted", + + Conmon: conmonPath, ConmonEnv: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", }, diff --git a/server/container_create.go b/server/container_create.go index 57624d08..c7ee28b8 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -673,7 +673,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, containerInfo.Dir, created, containerImageConfig.Config.StopSignal) + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, sb.trusted, containerInfo.Dir, created, containerImageConfig.Config.StopSignal) if err != nil { return nil, err } diff --git a/server/sandbox.go b/server/sandbox.go index 0f57f557..fdce6e99 100644 --- a/server/sandbox.go +++ b/server/sandbox.go @@ -143,6 +143,7 @@ type sandbox struct { shmPath string cgroupParent string privileged bool + trusted bool resolvPath string hostname string } diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 1353a616..66cf155b 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -48,6 +48,11 @@ func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool { return false } +// trustedSandbox returns true if the sandbox will run trusted workloads. +func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool { + return true +} + func (s *Server) runContainer(container *oci.Container, cgroupParent string) error { if err := s.runtime.CreateContainer(container, cgroupParent); err != nil { return err @@ -277,6 +282,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } privileged := s.privilegedSandbox(req) + trusted := s.trustedSandbox(req) g.AddAnnotation(annotations.Metadata, string(metadataJSON)) g.AddAnnotation(annotations.Labels, string(labelsJSON)) g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON)) @@ -288,6 +294,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation(annotations.ContainerID, id) g.AddAnnotation(annotations.ShmPath, shmPath) g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged)) + g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted)) g.AddAnnotation(annotations.ResolvPath, resolvPath) g.AddAnnotation(annotations.HostName, hostname) g.AddAnnotation(annotations.KubeName, kubeName) @@ -313,6 +320,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest metadata: metadata, shmPath: shmPath, privileged: privileged, + trusted: trusted, resolvPath: resolvPath, hostname: hostname, } @@ -438,7 +446,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err) } - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, podContainer.Dir, created, podContainer.Config.Config.StopSignal) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, sb.trusted, podContainer.Dir, created, podContainer.Config.Config.StopSignal) if err != nil { return nil, err } diff --git a/server/server.go b/server/server.go index 93a81e26..7424bb59 100644 --- a/server/server.go +++ b/server/server.go @@ -152,7 +152,7 @@ func (s *Server) loadContainer(id string) error { return err } - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, sb.trusted, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) if err != nil { return err } @@ -243,6 +243,7 @@ func (s *Server) loadSandbox(id string) error { } privileged := isTrue(m.Annotations[annotations.PrivilegedRuntime]) + trusted := isTrue(m.Annotations[annotations.TrustedSandbox]) sb := &sandbox{ id: id, @@ -257,6 +258,7 @@ func (s *Server) loadSandbox(id string) error { metadata: &metadata, shmPath: m.Annotations[annotations.ShmPath], privileged: privileged, + trusted: trusted, resolvPath: m.Annotations[annotations.ResolvPath], } @@ -308,7 +310,7 @@ func (s *Server) loadSandbox(id string) error { return err } - scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) if err != nil { return err } @@ -563,7 +565,7 @@ func New(config *Config) (*Server, error) { return nil, err } - r, err := oci.New(config.Runtime, config.RuntimeHostPrivileged, config.Conmon, config.ConmonEnv, config.CgroupManager) + r, err := oci.New(config.Runtime, config.RuntimeUntrustedWorkload, config.DefaultWorkloadTrust, config.Conmon, config.ConmonEnv, config.CgroupManager) if err != nil { return nil, err } From 4462480e54cf1666be7fae91937fcaa3757dca66 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 8 Jun 2017 14:03:24 +0200 Subject: [PATCH 2/2] sandbox: Check for trusted annotations If we get a kubelet annotation about the sandbox trust level, we use it to toggle our sandbox trust flag. Signed-off-by: Samuel Ortiz --- server/sandbox_run.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 66cf155b..5b50670e 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -50,7 +50,15 @@ func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool { // trustedSandbox returns true if the sandbox will run trusted workloads. func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool { - return true + kubeAnnotations := req.GetConfig().GetAnnotations() + + trustedAnnotation, ok := kubeAnnotations[annotations.TrustedSandbox] + if !ok { + // A sandbox is trusted by default. + return true + } + + return isTrue(trustedAnnotation) } func (s *Server) runContainer(container *oci.Container, cgroupParent string) error {