diff --git a/cmd/ocid/config.go b/cmd/ocid/config.go index db1ccc15..982732b3 100644 --- a/cmd/ocid/config.go +++ b/cmd/ocid/config.go @@ -42,6 +42,12 @@ listen = "{{ .Listen }}" # runtime is a path to the OCI runtime which ocid will be using. runtime = "{{ .Runtime }}" +# runtime_host_privileged is a path to the OCI runtime which ocid +# will be using for host privileged operations. +# If this string is empty, ocid will not try to use the "runtime" +# for all operations. +runtime_host_privileged = "{{ .RuntimeHostPrivileged }}" + # conmon is the path to conmon binary, used for managing the runtime. conmon = "{{ .Conmon }}" diff --git a/oci/oci.go b/oci/oci.go index 6c3ac4ff..83f8a72f 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -34,24 +34,26 @@ const ( ) // New creates a new Runtime with options provided -func New(runtimePath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { +func New(runtimePath string, runtimeHostPrivilegedPath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { r := &Runtime{ - name: filepath.Base(runtimePath), - path: runtimePath, - conmonPath: conmonPath, - conmonEnv: conmonEnv, - cgroupManager: cgroupManager, + name: filepath.Base(runtimePath), + path: runtimePath, + privilegedPath: runtimeHostPrivilegedPath, + conmonPath: conmonPath, + conmonEnv: conmonEnv, + cgroupManager: cgroupManager, } return r, nil } // Runtime stores the information about a oci runtime type Runtime struct { - name string - path string - conmonPath string - conmonEnv []string - cgroupManager string + name string + path string + privilegedPath string + conmonPath string + conmonEnv []string + cgroupManager string } // syncInfo is used to return data from monitor process to daemon @@ -69,8 +71,14 @@ func (r *Runtime) Name() string { return r.name } -// Path returns the full path the OCI Runtime executable -func (r *Runtime) Path() string { +// Path returns the full path the OCI Runtime executable. +// Depending if the container is privileged, it will return +// the privileged runtime or not. +func (r *Runtime) Path(c *Container) string { + if c.privileged && r.privilegedPath != "" { + return r.privilegedPath + } + return r.path } @@ -107,7 +115,7 @@ func (r *Runtime) CreateContainer(c *Container) error { args = append(args, "-s") } args = append(args, "-c", c.name) - args = append(args, "-r", r.path) + args = append(args, "-r", r.Path(c)) args = append(args, "-b", c.bundlePath) args = append(args, "-p", filepath.Join(c.bundlePath, "pidfile")) if c.terminal { @@ -149,7 +157,7 @@ func (r *Runtime) CreateContainer(c *Container) error { func (r *Runtime) StartContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "start", c.name); err != nil { + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "start", c.name); err != nil { return err } c.state.Started = time.Now() @@ -209,7 +217,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp var args []string args = append(args, "-c", c.name) - args = append(args, "-r", r.path) + args = append(args, "-r", r.Path(c)) args = append(args, "-p", pidFile.Name()) args = append(args, "-e") if c.terminal { @@ -341,7 +349,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp func (r *Runtime) StopContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", c.name, "TERM"); err != nil { + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", c.name, "TERM"); err != nil { return err } i := 0 @@ -369,14 +377,14 @@ func (r *Runtime) StopContainer(c *Container) error { func (r *Runtime) DeleteContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "delete", c.name) + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "delete", c.name) } // UpdateStatus refreshes the status of the container. func (r *Runtime) UpdateStatus(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - out, err := exec.Command(r.path, "state", c.name).CombinedOutput() + out, err := exec.Command(r.Path(c), "state", c.name).CombinedOutput() if err != nil { return fmt.Errorf("error getting container state for %s: %s: %q", c.name, err, out) } @@ -426,6 +434,7 @@ type Container struct { sandbox string netns ns.NetNS terminal bool + privileged bool state *ContainerState metadata *pb.ContainerMetadata opLock sync.Mutex @@ -441,7 +450,7 @@ type ContainerState struct { } // NewContainer creates a container object. -func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) { +func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, privileged bool) (*Container, error) { c := &Container{ id: id, name: name, @@ -451,6 +460,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net sandbox: sandbox, netns: netns, terminal: terminal, + privileged: privileged, metadata: metadata, annotations: annotations, image: image, diff --git a/server/config.go b/server/config.go index 56fcdda5..335b110e 100644 --- a/server/config.go +++ b/server/config.go @@ -76,6 +76,10 @@ type RuntimeConfig struct { // yet merged a CLI API (so we assume runC's API here). Runtime string `toml:"runtime"` + // RuntimeHostPrivileged is a path to the OCI runtime which ocid will be + // using for host privileged operations. + RuntimeHostPrivileged string `toml:"runtime_host_privileged"` + // Conmon is the path to conmon binary, used for managing the runtime. Conmon string `toml:"conmon"` @@ -205,8 +209,9 @@ func DefaultConfig() *Config { Listen: "/var/run/ocid.sock", }, RuntimeConfig: RuntimeConfig{ - Runtime: "/usr/bin/runc", - Conmon: conmonPath, + Runtime: "/usr/bin/runc", + RuntimeHostPrivileged: "", + Conmon: conmonPath, ConmonEnv: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", }, diff --git a/server/container_create.go b/server/container_create.go index 8a98115d..af31c364 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -25,6 +25,37 @@ const ( seccompLocalhostPrefix = "localhost/" ) +func addOciBindMounts(sb *sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { + mounts := containerConfig.GetMounts() + for _, mount := range mounts { + dest := mount.ContainerPath + if dest == "" { + return fmt.Errorf("Mount.ContainerPath is empty") + } + + src := mount.HostPath + if src == "" { + return fmt.Errorf("Mount.HostPath is empty") + } + + options := []string{"rw"} + if mount.Readonly { + options = []string{"ro"} + } + + if mount.SelinuxRelabel { + // Need a way in kubernetes to determine if the volume is shared or private + if err := label.Relabel(src, sb.mountLabel, true); err != nil && err != syscall.ENOTSUP { + return fmt.Errorf("relabel failed %s: %v", src, err) + } + } + + specgen.AddBindMount(src, dest, options) + } + + return nil +} + // CreateContainer creates a new container in specified PodSandbox func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) { logrus.Debugf("CreateContainerRequest %+v", req) @@ -145,31 +176,8 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } } - mounts := containerConfig.GetMounts() - for _, mount := range mounts { - dest := mount.ContainerPath - if dest == "" { - return nil, fmt.Errorf("Mount.ContainerPath is empty") - } - - src := mount.HostPath - if src == "" { - return nil, fmt.Errorf("Mount.HostPath is empty") - } - - options := []string{"rw"} - if mount.Readonly { - options = []string{"ro"} - } - - if mount.SelinuxRelabel { - // Need a way in kubernetes to determine if the volume is shared or private - if err := label.Relabel(src, sb.mountLabel, true); err != nil && err != syscall.ENOTSUP { - return nil, fmt.Errorf("relabel failed %s: %v", src, err) - } - } - - specgen.AddBindMount(src, dest, options) + if err := addOciBindMounts(sb, containerConfig, &specgen); err != nil { + return nil, err } labels := containerConfig.GetLabels() @@ -384,7 +392,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty) + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty, sb.privileged) if err != nil { return nil, err } diff --git a/server/sandbox.go b/server/sandbox.go index 06edaded..65868a9f 100644 --- a/server/sandbox.go +++ b/server/sandbox.go @@ -139,6 +139,7 @@ type sandbox struct { metadata *pb.PodSandboxMetadata shmPath string cgroupParent string + privileged bool } const ( diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 99923e52..7a1882b0 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -17,6 +17,32 @@ import ( pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" ) +// privilegedSandbox returns true if the sandbox configuration +// requires additional host privileges for the sandbox. +func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool { + securityContext := req.GetConfig().GetLinux().GetSecurityContext() + if securityContext == nil { + return false + } + + if securityContext.Privileged { + return true + } + + namespaceOptions := securityContext.GetNamespaceOptions() + if namespaceOptions == nil { + return false + } + + if namespaceOptions.HostNetwork || + namespaceOptions.HostPid || + namespaceOptions.HostIpc { + return true + } + + return false +} + func (s *Server) runContainer(container *oci.Container) error { if err := s.runtime.CreateContainer(container); err != nil { return err @@ -218,6 +244,8 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } }() + privileged := s.privilegedSandbox(req) + g.AddAnnotation("ocid/metadata", string(metadataJSON)) g.AddAnnotation("ocid/labels", string(labelsJSON)) g.AddAnnotation("ocid/annotations", string(annotationsJSON)) @@ -228,6 +256,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation("ocid/container_name", containerName) g.AddAnnotation("ocid/container_id", id) g.AddAnnotation("ocid/shm_path", shmPath) + g.AddAnnotation("ocid/privileged_runtime", fmt.Sprintf("%v", privileged)) sb := &sandbox{ id: id, @@ -240,6 +269,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest mountLabel: mountLabel, metadata: metadata, shmPath: shmPath, + privileged: privileged, } s.addSandbox(sb) @@ -344,7 +374,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err) } - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logDir, sb.netNs(), labels, annotations, nil, nil, id, false) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logDir, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged) if err != nil { return nil, err } diff --git a/server/server.go b/server/server.go index f1d17450..be7307a0 100644 --- a/server/server.go +++ b/server/server.go @@ -105,7 +105,7 @@ func (s *Server) loadContainer(id string) error { return err } - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty) + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty, sb.privileged) if err != nil { return err } @@ -173,6 +173,8 @@ func (s *Server) loadSandbox(id string) error { return err } + privileged := m.Annotations["ocid/privileged_runtime"] == "true" + sb := &sandbox{ id: id, name: name, @@ -184,6 +186,7 @@ func (s *Server) loadSandbox(id string) error { annotations: annotations, metadata: &metadata, shmPath: m.Annotations["ocid/shm_path"], + privileged: privileged, } // We add a netNS only if we can load a permanent one. @@ -223,7 +226,8 @@ func (s *Server) loadSandbox(id string) error { s.releaseContainerName(cname) } }() - scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false) + + scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false, privileged) if err != nil { return err } @@ -452,7 +456,7 @@ func New(config *Config) (*Server, error) { return nil, err } - r, err := oci.New(config.Runtime, config.Conmon, config.ConmonEnv, config.CgroupManager) + r, err := oci.New(config.Runtime, config.RuntimeHostPrivileged, config.Conmon, config.ConmonEnv, config.CgroupManager) if err != nil { return nil, err }