oci: Support mixing trusted and untrusted workloads
Container runtimes provide different levels of isolation, from kernel namespaces to hardware virtualization. When starting a specific container, one may want to decide which level of isolation to use depending on how much we trust the container workload. Fully verified and signed containers may not need the hardware isolation layer but e.g. CI jobs pulling packages from many untrusted sources should probably not run only on a kernel namespace isolation layer. Here we allow CRI-O users to define a container runtime for trusted containers and another one for untrusted containers, and also to define a general, default trust level. This anticipates future kubelet implementations that would be able to tag containers as trusted or untrusted. When missing a kubelet hint, containers are trusted by default. A container becomes untrusted if we get a hint in that direction from kubelet or if the default trust level is set to "untrusted" and the container is not privileged. In both cases CRI-O will try to use the untrusted container runtime. For any other cases, it will switch to the trusted one. Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
This commit is contained in:
parent
7b9032bac7
commit
0e51bbb778
9 changed files with 120 additions and 40 deletions
|
@ -45,14 +45,33 @@ stream_port = "{{ .StreamPort }}"
|
|||
# runtime used and options for how to set up and manage the OCI runtime.
|
||||
[crio.runtime]
|
||||
|
||||
# runtime is a path to the OCI runtime which crio will be using.
|
||||
# runtime is the OCI compatible runtime used for trusted container workloads.
|
||||
# This is a mandatory setting as this runtime will be the default one
|
||||
# and will also be used for untrusted container workloads if
|
||||
# runtime_untrusted_workload is not set.
|
||||
runtime = "{{ .Runtime }}"
|
||||
|
||||
# runtime_host_privileged is a path to the OCI runtime which crio
|
||||
# will be using for host privileged operations.
|
||||
# If this string is empty, crio will not try to use the "runtime"
|
||||
# for all operations.
|
||||
runtime_host_privileged = "{{ .RuntimeHostPrivileged }}"
|
||||
# runtime_untrusted_workload is the OCI compatible runtime used for untrusted
|
||||
# container workloads. This is an optional setting, except if
|
||||
# default_container_trust is set to "untrusted".
|
||||
runtime_untrusted_workload = "{{ .RuntimeUntrustedWorkload }}"
|
||||
|
||||
# default_workload_trust is the default level of trust crio puts in container
|
||||
# workloads. It can either be "trusted" or "untrusted", and the default
|
||||
# is "trusted".
|
||||
# Containers can be run through different container runtimes, depending on
|
||||
# the trust hints we receive from kubelet:
|
||||
# - If kubelet tags a container workload as untrusted, crio will try first to
|
||||
# run it through the untrusted container workload runtime. If it is not set,
|
||||
# crio will use the trusted runtime.
|
||||
# - If kubelet does not provide any information about the container workload trust
|
||||
# level, the selected runtime will depend on the default_container_trust setting.
|
||||
# If it is set to "untrusted", then all containers except for the host privileged
|
||||
# ones, will be run by the runtime_untrusted_workload runtime. Host privileged
|
||||
# containers are by definition trusted and will always use the trusted container
|
||||
# runtime. If default_container_trust is set to "trusted", crio will use the trusted
|
||||
# container runtime for all containers.
|
||||
default_workload_trust = "{{ .DefaultWorkloadTrust }}"
|
||||
|
||||
# conmon is the path to conmon binary, used for managing the runtime.
|
||||
conmon = "{{ .Conmon }}"
|
||||
|
|
|
@ -34,6 +34,7 @@ type Container struct {
|
|||
stdin bool
|
||||
stdinOnce bool
|
||||
privileged bool
|
||||
trusted bool
|
||||
state *ContainerState
|
||||
metadata *pb.ContainerMetadata
|
||||
opLock sync.Mutex
|
||||
|
@ -56,7 +57,7 @@ type ContainerState struct {
|
|||
}
|
||||
|
||||
// NewContainer creates a container object.
|
||||
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, dir string, created time.Time, stopSignal string) (*Container, error) {
|
||||
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, stdin bool, stdinOnce bool, privileged bool, trusted bool, dir string, created time.Time, stopSignal string) (*Container, error) {
|
||||
state := &ContainerState{}
|
||||
state.Created = created
|
||||
c := &Container{
|
||||
|
@ -71,6 +72,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net
|
|||
stdin: stdin,
|
||||
stdinOnce: stdinOnce,
|
||||
privileged: privileged,
|
||||
trusted: trusted,
|
||||
metadata: metadata,
|
||||
annotations: annotations,
|
||||
image: image,
|
||||
|
|
62
oci/oci.go
62
oci/oci.go
|
@ -31,26 +31,28 @@ const (
|
|||
)
|
||||
|
||||
// New creates a new Runtime with options provided
|
||||
func New(runtimePath string, runtimeHostPrivilegedPath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) {
|
||||
func New(runtimeTrustedPath string, runtimeUntrustedPath string, trustLevel string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) {
|
||||
r := &Runtime{
|
||||
name: filepath.Base(runtimePath),
|
||||
path: runtimePath,
|
||||
privilegedPath: runtimeHostPrivilegedPath,
|
||||
conmonPath: conmonPath,
|
||||
conmonEnv: conmonEnv,
|
||||
cgroupManager: cgroupManager,
|
||||
name: filepath.Base(runtimeTrustedPath),
|
||||
trustedPath: runtimeTrustedPath,
|
||||
untrustedPath: runtimeUntrustedPath,
|
||||
trustLevel: trustLevel,
|
||||
conmonPath: conmonPath,
|
||||
conmonEnv: conmonEnv,
|
||||
cgroupManager: cgroupManager,
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Runtime stores the information about a oci runtime
|
||||
type Runtime struct {
|
||||
name string
|
||||
path string
|
||||
privilegedPath string
|
||||
conmonPath string
|
||||
conmonEnv []string
|
||||
cgroupManager string
|
||||
name string
|
||||
trustedPath string
|
||||
untrustedPath string
|
||||
trustLevel string
|
||||
conmonPath string
|
||||
conmonEnv []string
|
||||
cgroupManager string
|
||||
}
|
||||
|
||||
// syncInfo is used to return data from monitor process to daemon
|
||||
|
@ -70,19 +72,41 @@ func (r *Runtime) Name() string {
|
|||
}
|
||||
|
||||
// Path returns the full path the OCI Runtime executable.
|
||||
// Depending if the container is privileged, it will return
|
||||
// the privileged runtime or not.
|
||||
// Depending if the container is privileged and/or trusted,
|
||||
// this will return either the trusted or untrusted runtime path.
|
||||
func (r *Runtime) Path(c *Container) string {
|
||||
if c.privileged && r.privilegedPath != "" {
|
||||
return r.privilegedPath
|
||||
if !c.trusted {
|
||||
// We have an explicitly untrusted container.
|
||||
if c.privileged {
|
||||
logrus.Warnf("Running an untrusted but privileged container")
|
||||
return r.trustedPath
|
||||
}
|
||||
|
||||
if r.untrustedPath != "" {
|
||||
return r.untrustedPath
|
||||
}
|
||||
|
||||
return r.trustedPath
|
||||
}
|
||||
|
||||
return r.path
|
||||
// Our container is trusted. Let's look at the configured trust level.
|
||||
if r.trustLevel == "trusted" {
|
||||
return r.trustedPath
|
||||
}
|
||||
|
||||
// Our container is trusted, but we are running untrusted.
|
||||
// We will use the untrusted container runtime if it's set
|
||||
// and if it's not a privileged container.
|
||||
if c.privileged || r.untrustedPath == "" {
|
||||
return r.trustedPath
|
||||
}
|
||||
|
||||
return r.untrustedPath
|
||||
}
|
||||
|
||||
// Version returns the version of the OCI Runtime
|
||||
func (r *Runtime) Version() (string, error) {
|
||||
runtimeVersion, err := getOCIVersion(r.path, "-v")
|
||||
runtimeVersion, err := getOCIVersion(r.trustedPath, "-v")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -52,6 +52,9 @@ const (
|
|||
// ShmPath is the shared memory path annotation
|
||||
ShmPath = "io.kubernetes.cri-o.ShmPath"
|
||||
|
||||
// TrustedSandbox is the annotation for trusted sandboxes
|
||||
TrustedSandbox = "io.kubernetes.cri-o.TrustedSandbox"
|
||||
|
||||
// TTY is the terminal path annotation
|
||||
TTY = "io.kubernetes.cri-o.TTY"
|
||||
|
||||
|
|
|
@ -75,14 +75,33 @@ type APIConfig struct {
|
|||
|
||||
// RuntimeConfig represents the "crio.runtime" TOML config table.
|
||||
type RuntimeConfig struct {
|
||||
// Runtime is a path to the OCI runtime which crio will be using. Currently
|
||||
// the only known working choice is runC, simply because the OCI has not
|
||||
// yet merged a CLI API (so we assume runC's API here).
|
||||
// Runtime is the OCI compatible runtime used for trusted container workloads.
|
||||
// This is a mandatory setting as this runtime will be the default one and
|
||||
// will also be used for untrusted container workloads if
|
||||
// RuntimeUntrustedWorkload is not set.
|
||||
Runtime string `toml:"runtime"`
|
||||
|
||||
// RuntimeHostPrivileged is a path to the OCI runtime which crio will be
|
||||
// using for host privileged operations.
|
||||
RuntimeHostPrivileged string `toml:"runtime_host_privileged"`
|
||||
// RuntimeUntrustedWorkload is the OCI compatible runtime used for untrusted
|
||||
// container workloads. This is an optional setting, except if
|
||||
// DefaultWorkloadTrust is set to "untrusted".
|
||||
RuntimeUntrustedWorkload string `toml:"runtime_untrusted_workload"`
|
||||
|
||||
// DefaultWorkloadTrust is the default level of trust crio puts in container
|
||||
// workloads. This can either be "trusted" or "untrusted" and the default
|
||||
// is "trusted"
|
||||
// Containers can be run through different container runtimes, depending on
|
||||
// the trust hints we receive from kubelet:
|
||||
// - If kubelet tags a container workload as untrusted, crio will try first
|
||||
// to run it through the untrusted container workload runtime. If it is not
|
||||
// set, crio will use the trusted runtime.
|
||||
// - If kubelet does not provide any information about the container workload trust
|
||||
// level, the selected runtime will depend on the DefaultWorkloadTrust setting.
|
||||
// If it is set to "untrusted", then all containers except for the host privileged
|
||||
// ones, will be run by the RuntimeUntrustedWorkload runtime. Host privileged
|
||||
// containers are by definition trusted and will always use the trusted container
|
||||
// runtime. If DefaultWorkloadTrust is set to "trusted", crio will use the trusted
|
||||
// container runtime for all containers.
|
||||
DefaultWorkloadTrust string `toml:"default_workload_trust"`
|
||||
|
||||
// Conmon is the path to conmon binary, used for managing the runtime.
|
||||
Conmon string `toml:"conmon"`
|
||||
|
@ -218,9 +237,11 @@ func DefaultConfig() *Config {
|
|||
StreamPort: "10010",
|
||||
},
|
||||
RuntimeConfig: RuntimeConfig{
|
||||
Runtime: "/usr/bin/runc",
|
||||
RuntimeHostPrivileged: "",
|
||||
Conmon: conmonPath,
|
||||
Runtime: "/usr/bin/runc",
|
||||
RuntimeUntrustedWorkload: "",
|
||||
DefaultWorkloadTrust: "trusted",
|
||||
|
||||
Conmon: conmonPath,
|
||||
ConmonEnv: []string{
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
},
|
||||
|
|
|
@ -673,7 +673,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string,
|
|||
return nil, err
|
||||
}
|
||||
|
||||
container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
|
||||
container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, imageSpec, metadata, sb.id, containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.privileged, sb.trusted, containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -143,6 +143,7 @@ type sandbox struct {
|
|||
shmPath string
|
||||
cgroupParent string
|
||||
privileged bool
|
||||
trusted bool
|
||||
resolvPath string
|
||||
hostname string
|
||||
}
|
||||
|
|
|
@ -48,6 +48,11 @@ func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// trustedSandbox returns true if the sandbox will run trusted workloads.
|
||||
func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *Server) runContainer(container *oci.Container, cgroupParent string) error {
|
||||
if err := s.runtime.CreateContainer(container, cgroupParent); err != nil {
|
||||
return err
|
||||
|
@ -277,6 +282,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
|||
}
|
||||
|
||||
privileged := s.privilegedSandbox(req)
|
||||
trusted := s.trustedSandbox(req)
|
||||
g.AddAnnotation(annotations.Metadata, string(metadataJSON))
|
||||
g.AddAnnotation(annotations.Labels, string(labelsJSON))
|
||||
g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON))
|
||||
|
@ -288,6 +294,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
|||
g.AddAnnotation(annotations.ContainerID, id)
|
||||
g.AddAnnotation(annotations.ShmPath, shmPath)
|
||||
g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged))
|
||||
g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted))
|
||||
g.AddAnnotation(annotations.ResolvPath, resolvPath)
|
||||
g.AddAnnotation(annotations.HostName, hostname)
|
||||
g.AddAnnotation(annotations.KubeName, kubeName)
|
||||
|
@ -313,6 +320,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
|||
metadata: metadata,
|
||||
shmPath: shmPath,
|
||||
privileged: privileged,
|
||||
trusted: trusted,
|
||||
resolvPath: resolvPath,
|
||||
hostname: hostname,
|
||||
}
|
||||
|
@ -438,7 +446,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
|||
return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err)
|
||||
}
|
||||
|
||||
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, podContainer.Dir, created, podContainer.Config.Config.StopSignal)
|
||||
container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, sb.privileged, sb.trusted, podContainer.Dir, created, podContainer.Config.Config.StopSignal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -152,7 +152,7 @@ func (s *Server) loadContainer(id string) error {
|
|||
return err
|
||||
}
|
||||
|
||||
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
|
||||
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, img, &metadata, sb.id, tty, stdin, stdinOnce, sb.privileged, sb.trusted, containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -243,6 +243,7 @@ func (s *Server) loadSandbox(id string) error {
|
|||
}
|
||||
|
||||
privileged := isTrue(m.Annotations[annotations.PrivilegedRuntime])
|
||||
trusted := isTrue(m.Annotations[annotations.TrustedSandbox])
|
||||
|
||||
sb := &sandbox{
|
||||
id: id,
|
||||
|
@ -257,6 +258,7 @@ func (s *Server) loadSandbox(id string) error {
|
|||
metadata: &metadata,
|
||||
shmPath: m.Annotations[annotations.ShmPath],
|
||||
privileged: privileged,
|
||||
trusted: trusted,
|
||||
resolvPath: m.Annotations[annotations.ResolvPath],
|
||||
}
|
||||
|
||||
|
@ -308,7 +310,7 @@ func (s *Server) loadSandbox(id string) error {
|
|||
return err
|
||||
}
|
||||
|
||||
scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
|
||||
scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.netNs(), labels, kubeAnnotations, nil, nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -563,7 +565,7 @@ func New(config *Config) (*Server, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
r, err := oci.New(config.Runtime, config.RuntimeHostPrivileged, config.Conmon, config.ConmonEnv, config.CgroupManager)
|
||||
r, err := oci.New(config.Runtime, config.RuntimeUntrustedWorkload, config.DefaultWorkloadTrust, config.Conmon, config.ConmonEnv, config.CgroupManager)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue