oci: Support mixing trusted and untrusted workloads

Container runtimes provide different levels of isolation, from kernel namespaces to hardware virtualization. When starting a specific container, one may want to decide which level of isolation to use depending on how much we trust the container workload. Fully verified and signed containers may not need the hardware isolation layer but e.g. CI jobs pulling packages from many untrusted sources should probably not run only on a kernel namespace isolation layer. Here we allow CRI-O users to define a container runtime for trusted containers and another one for untrusted containers, and also to define a general, default trust level. This anticipates future kubelet implementations that would be able to tag containers as trusted or untrusted. When missing a kubelet hint, containers are trusted by default. A container becomes untrusted if we get a hint in that direction from kubelet or if the default trust level is set to "untrusted" and the container is not privileged. In both cases CRI-O will try to use the untrusted container runtime. For any other cases, it will switch to the trusted one. Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
2017-06-02 23:15:19 +02:00 · 2017-06-02 23:15:19 +02:00 · 0e51bbb778
commit 0e51bbb778
parent 7b9032bac7
9 changed files with 120 additions and 40 deletions
--- a/cmd/crio/config.go
+++ b/cmd/crio/config.go
@ -45,14 +45,33 @@ stream_port = "{{ .StreamPort }}"
 # runtime used and options for how to set up and manage the OCI runtime.
 [crio.runtime]
-# runtime is a path to the OCI runtime which crio will be using.
+# runtime is the OCI compatible runtime used for trusted container workloads.
 # This is a mandatory setting as this runtime will be the default one
 # and will also be used for untrusted container workloads if
 # runtime_untrusted_workload is not set.
 runtime = "{{ .Runtime }}"
-# runtime_host_privileged is a path to the OCI runtime which crio
+# runtime_untrusted_workload is the OCI compatible runtime used for untrusted
-# will be using for host privileged operations.
+# container workloads. This is an optional setting, except if
-# If this string is empty, crio will not try to use the "runtime"
+# default_container_trust is set to "untrusted".
-# for all operations.
+runtime_untrusted_workload = "{{ .RuntimeUntrustedWorkload }}"
-runtime_host_privileged = "{{ .RuntimeHostPrivileged }}"
+
 # default_workload_trust is the default level of trust crio puts in container
 # workloads. It can either be "trusted" or "untrusted", and the default
 # is "trusted".
 # Containers can be run through different container runtimes, depending on
 # the trust hints we receive from kubelet:
 # - If kubelet tags a container workload as untrusted, crio will try first to
 # run it through the untrusted container workload runtime. If it is not set,
 # crio will use the trusted runtime.
 # - If kubelet does not provide any information about the container workload trust
 # level, the selected runtime will depend on the default_container_trust setting.
 # If it is set to "untrusted", then all containers except for the host privileged
 # ones, will be run by the runtime_untrusted_workload runtime. Host privileged
 # containers are by definition trusted and will always use the trusted container
 # runtime. If default_container_trust is set to "trusted", crio will use the trusted
 # container runtime for all containers.
 default_workload_trust = "{{ .DefaultWorkloadTrust }}"
 # conmon is the path to conmon binary, used for managing the runtime.
 conmon = "{{ .Conmon }}"
--- a/oci/container.go
+++ b/oci/container.go
@ -34,6 +34,7 @@ type Container struct {
 	stdin       bool
 	stdinOnce   bool
 	privileged  bool
 	trusted     bool
 	state       *ContainerState
 	metadata    *pb.ContainerMetadata
 	opLock      sync.Mutex
@ -56,7 +57,7 @@ type ContainerState struct {
 }
 // NewContainer creates a container object.
-func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, dir string, created time.Time, stopSignal string) (*Container, error) {
+func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) {
 	state := &ContainerState{}
 	state.Created = created
 	c := &Container{
@ -71,6 +72,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net
 		stdin:       stdin,
 		stdinOnce:   stdinOnce,
 		privileged:  privileged,
 		trusted:     trusted,
 		metadata:    metadata,
 		annotations: annotations,
 		image:       image,
--- a/oci/oci.go
+++ b/oci/oci.go
@ -31,26 +31,28 @@ const (
 )
 // New creates a new Runtime with options provided
-func New(runtimePath string, runtimeHostPrivilegedPath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) {
+func New(runtimeTrustedPath string, runtimeUntrustedPath string, trustLevel string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) {
 	r := &Runtime{
-		name:           filepath.Base(runtimePath),
+		name:          filepath.Base(runtimeTrustedPath),
-		path:           runtimePath,
+		trustedPath:   runtimeTrustedPath,
-		privilegedPath: runtimeHostPrivilegedPath,
+		untrustedPath: runtimeUntrustedPath,
-		conmonPath:     conmonPath,
+		trustLevel:    trustLevel,
-		conmonEnv:      conmonEnv,
+		conmonPath:    conmonPath,
-		cgroupManager:  cgroupManager,
+		conmonEnv:     conmonEnv,
 		cgroupManager: cgroupManager,
 	}
 	return r, nil
 }
 // Runtime stores the information about a oci runtime
 type Runtime struct {
-	name           string
+	name          string
-	path           string
+	trustedPath   string
-	privilegedPath string
+	untrustedPath string
-	conmonPath     string
+	trustLevel    string
-	conmonEnv      []string
+	conmonPath    string
-	cgroupManager  string
+	conmonEnv     []string
 	cgroupManager string
 }
 // syncInfo is used to return data from monitor process to daemon
@ -70,19 +72,41 @@ func (r *Runtime) Name() string {
 }
 // Path returns the full path the OCI Runtime executable.
-// Depending if the container is privileged, it will return
+// Depending if the container is privileged and/or trusted,
-// the privileged runtime or not.
+// this will return either the trusted or untrusted runtime path.
 func (r *Runtime) Path(c *Container) string {
-	if c.privileged && r.privilegedPath != "" {
+	if !c.trusted {
-		return r.privilegedPath
+		// We have an explicitly untrusted container.
 		if c.privileged {
 			logrus.Warnf("Running an untrusted but privileged container")
 			return r.trustedPath
 		}
 		if r.untrustedPath != "" {
 			return r.untrustedPath
 		}
 		return r.trustedPath
 	}
-	return r.path
+	// Our container is trusted. Let's look at the configured trust level.
 	if r.trustLevel == "trusted" {
 		return r.trustedPath
 	}
 	// Our container is trusted, but we are running untrusted.
 	// We will use the untrusted container runtime if it's set
 	// and if it's not a privileged container.
 	if c.privileged || r.untrustedPath == "" {
 		return r.trustedPath
 	}
 	return r.untrustedPath
 }
 // Version returns the version of the OCI Runtime
 func (r *Runtime) Version() (string, error) {
-	runtimeVersion, err := getOCIVersion(r.path, "-v")
+	runtimeVersion, err := getOCIVersion(r.trustedPath, "-v")
 	if err != nil {
 		return "", err
 	}
--- a/pkg/annotations/annotations.go
+++ b/pkg/annotations/annotations.go
@ -52,6 +52,9 @@ const (
 	// ShmPath is the shared memory path annotation
 	ShmPath = "io.kubernetes.cri-o.ShmPath"
 	// TrustedSandbox is the annotation for trusted sandboxes
 	TrustedSandbox = "io.kubernetes.cri-o.TrustedSandbox"
 	// TTY is the terminal path annotation
 	TTY = "io.kubernetes.cri-o.TTY"
--- a/server/config.go
+++ b/server/config.go
@ -75,14 +75,33 @@ type APIConfig struct {
 // RuntimeConfig represents the "crio.runtime" TOML config table.
 type RuntimeConfig struct {
-	// Runtime is a path to the OCI runtime which crio will be using. Currently
+	// Runtime is the OCI compatible runtime used for trusted container workloads.
-	// the only known working choice is runC, simply because the OCI has not
+	// This is a mandatory setting as this runtime will be the default one and
-	// yet merged a CLI API (so we assume runC's API here).
+	// will also be used for untrusted container workloads if
 	// RuntimeUntrustedWorkload is not set.
 	Runtime string `toml:"runtime"`
-	// RuntimeHostPrivileged is a path to the OCI runtime which crio will be
+	// RuntimeUntrustedWorkload is the OCI compatible runtime used for untrusted
-	// using for host privileged operations.
+	// container workloads. This is an optional setting, except if
-	RuntimeHostPrivileged string `toml:"runtime_host_privileged"`
+	// DefaultWorkloadTrust is set to "untrusted".
 	RuntimeUntrustedWorkload string `toml:"runtime_untrusted_workload"`
 	// DefaultWorkloadTrust is the default level of trust crio puts in container
 	// workloads. This can either be "trusted" or "untrusted" and the default
 	// is "trusted"
 	// Containers can be run through different container runtimes, depending on
 	// the trust hints we receive from kubelet:
 	// - If kubelet tags a container workload as untrusted, crio will try first
 	// to run it through the untrusted container workload runtime. If it is not
 	// set, crio will use the trusted runtime.
 	// - If kubelet does not provide any information about the container workload trust
 	// level, the selected runtime will depend on the DefaultWorkloadTrust setting.
 	// If it is set to "untrusted", then all containers except for the host privileged
 	// ones, will be run by the RuntimeUntrustedWorkload runtime. Host privileged
 	// containers are by definition trusted and will always use the trusted container
 	// runtime. If DefaultWorkloadTrust is set to "trusted", crio will use the trusted
 	// container runtime for all containers.
 	DefaultWorkloadTrust string `toml:"default_workload_trust"`
 	// Conmon is the path to conmon binary, used for managing the runtime.
 	Conmon string `toml:"conmon"`
@ -218,9 +237,11 @@ func DefaultConfig() *Config {
 			StreamPort:    "10010",
 		},
 		RuntimeConfig: RuntimeConfig{
-			Runtime:               "/usr/bin/runc",
+			Runtime:                  "/usr/bin/runc",
-			RuntimeHostPrivileged: "",
+			RuntimeUntrustedWorkload: "",
-			Conmon:                conmonPath,
+			DefaultWorkloadTrust:     "trusted",
 			Conmon: conmonPath,
 			ConmonEnv: []string{
 				"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
 			},
--- a/server/container_create.go
+++ b/server/container_create.go
@ -673,7 +673,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
 		return nil, err
 	}
-	container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
+	container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, sb.trusted, containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
 	if err != nil {
 		return nil, err
 	}
--- a/server/sandbox.go
+++ b/server/sandbox.go
@ -143,6 +143,7 @@ type sandbox struct {
 	shmPath        string
 	cgroupParent   string
 	privileged     bool
 	trusted        bool
 	resolvPath     string
 	hostname       string
 }
--- a/server/sandbox_run.go
+++ b/server/sandbox_run.go
@ -48,6 +48,11 @@ func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool {
 	return false
 }
 // trustedSandbox returns true if the sandbox will run trusted workloads.
 func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool {
 	return true
 }
 func (s *Server) runContainer(container *oci.Container, cgroupParent string) error {
 	if err := s.runtime.CreateContainer(container, cgroupParent); err != nil {
 		return err
@ -277,6 +282,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
 	}
 	privileged := s.privilegedSandbox(req)
 	trusted := s.trustedSandbox(req)
 	g.AddAnnotation(annotations.Metadata, string(metadataJSON))
 	g.AddAnnotation(annotations.Labels, string(labelsJSON))
 	g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON))
@ -288,6 +294,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
 	g.AddAnnotation(annotations.ContainerID, id)
 	g.AddAnnotation(annotations.ShmPath, shmPath)
 	g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged))
 	g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted))
 	g.AddAnnotation(annotations.ResolvPath, resolvPath)
 	g.AddAnnotation(annotations.HostName, hostname)
 	g.AddAnnotation(annotations.KubeName, kubeName)
@ -313,6 +320,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
 		metadata:     metadata,
 		shmPath:      shmPath,
 		privileged:   privileged,
 		trusted:      trusted,
 		resolvPath:   resolvPath,
 		hostname:     hostname,
 	}
@ -438,7 +446,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
 		return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err)
 	}
-	container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, podContainer.Dir, created, podContainer.Config.Config.StopSignal)
+	container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, sb.trusted, podContainer.Dir, created, podContainer.Config.Config.StopSignal)
 	if err != nil {
 		return nil, err
 	}
--- a/server/server.go
+++ b/server/server.go
@ -152,7 +152,7 @@ func (s *Server) loadContainer(id string) error {
 		return err
 	}
-	ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
+	ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, sb.trusted, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
 	if err != nil {
 		return err
 	}
@ -243,6 +243,7 @@ func (s *Server) loadSandbox(id string) error {
 	}
 	privileged := isTrue(m.Annotations[annotations.PrivilegedRuntime])
 	trusted := isTrue(m.Annotations[annotations.TrustedSandbox])
 	sb := &sandbox{
 		id:           id,
@ -257,6 +258,7 @@ func (s *Server) loadSandbox(id string) error {
 		metadata:     &metadata,
 		shmPath:      m.Annotations[annotations.ShmPath],
 		privileged:   privileged,
 		trusted:      trusted,
 		resolvPath:   m.Annotations[annotations.ResolvPath],
 	}
@ -308,7 +310,7 @@ func (s *Server) loadSandbox(id string) error {
 		return err
 	}
-	scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
+	scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
 	if err != nil {
 		return err
 	}
@ -563,7 +565,7 @@ func New(config *Config) (*Server, error) {
 		return nil, err
 	}
-	r, err := oci.New(config.Runtime, config.RuntimeHostPrivileged, config.Conmon, config.ConmonEnv, config.CgroupManager)
+	r, err := oci.New(config.Runtime, config.RuntimeUntrustedWorkload, config.DefaultWorkloadTrust, config.Conmon, config.ConmonEnv, config.CgroupManager)
 	if err != nil {
 		return nil, err
 	}