From 2fc4d0cac101348d00b425ebd2857945690e2ea0 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 21 Feb 2017 00:33:01 +0100 Subject: [PATCH 1/4] config: Add host privileged runtime configuration Not all runtimes are able to handle some of the kubelet security context options, in particular the ones granting host privileges to containers. By adding a host privileged runtime path configuration, we allow ocid to use a different runtime for host privileged operations like e.g. host namespaces access. Signed-off-by: Samuel Ortiz --- cmd/ocid/config.go | 6 ++++++ server/config.go | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cmd/ocid/config.go b/cmd/ocid/config.go index db1ccc15..982732b3 100644 --- a/cmd/ocid/config.go +++ b/cmd/ocid/config.go @@ -42,6 +42,12 @@ listen = "{{ .Listen }}" # runtime is a path to the OCI runtime which ocid will be using. runtime = "{{ .Runtime }}" +# runtime_host_privileged is a path to the OCI runtime which ocid +# will be using for host privileged operations. +# If this string is empty, ocid will not try to use the "runtime" +# for all operations. +runtime_host_privileged = "{{ .RuntimeHostPrivileged }}" + # conmon is the path to conmon binary, used for managing the runtime. conmon = "{{ .Conmon }}" diff --git a/server/config.go b/server/config.go index 56fcdda5..335b110e 100644 --- a/server/config.go +++ b/server/config.go @@ -76,6 +76,10 @@ type RuntimeConfig struct { // yet merged a CLI API (so we assume runC's API here). Runtime string `toml:"runtime"` + // RuntimeHostPrivileged is a path to the OCI runtime which ocid will be + // using for host privileged operations. + RuntimeHostPrivileged string `toml:"runtime_host_privileged"` + // Conmon is the path to conmon binary, used for managing the runtime. Conmon string `toml:"conmon"` @@ -205,8 +209,9 @@ func DefaultConfig() *Config { Listen: "/var/run/ocid.sock", }, RuntimeConfig: RuntimeConfig{ - Runtime: "/usr/bin/runc", - Conmon: conmonPath, + Runtime: "/usr/bin/runc", + RuntimeHostPrivileged: "", + Conmon: conmonPath, ConmonEnv: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", }, From eab6b00ea6e0c882b9a8eb28274a826d957a1a80 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 21 Feb 2017 11:51:16 +0100 Subject: [PATCH 2/4] oci: Support for the host privileged runtime path We add a privileged flag to the container and sandbox structures and can now select the appropriate runtime path for any container operations depending on that flag. Here again, the default runtime will be used for non privileged containers and for privileged ones in case there are no privileged runtime defined. Signed-off-by: Samuel Ortiz --- oci/oci.go | 47 ++++++++++++++++++++++++++++------------------- server/sandbox.go | 1 + server/server.go | 2 +- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/oci/oci.go b/oci/oci.go index 6c3ac4ff..e6e4a158 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -34,24 +34,26 @@ const ( ) // New creates a new Runtime with options provided -func New(runtimePath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { +func New(runtimePath string, runtimeHostPrivilegedPath string, conmonPath string, conmonEnv []string, cgroupManager string) (*Runtime, error) { r := &Runtime{ - name: filepath.Base(runtimePath), - path: runtimePath, - conmonPath: conmonPath, - conmonEnv: conmonEnv, - cgroupManager: cgroupManager, + name: filepath.Base(runtimePath), + path: runtimePath, + privilegedPath: runtimeHostPrivilegedPath, + conmonPath: conmonPath, + conmonEnv: conmonEnv, + cgroupManager: cgroupManager, } return r, nil } // Runtime stores the information about a oci runtime type Runtime struct { - name string - path string - conmonPath string - conmonEnv []string - cgroupManager string + name string + path string + privilegedPath string + conmonPath string + conmonEnv []string + cgroupManager string } // syncInfo is used to return data from monitor process to daemon @@ -69,8 +71,14 @@ func (r *Runtime) Name() string { return r.name } -// Path returns the full path the OCI Runtime executable -func (r *Runtime) Path() string { +// Path returns the full path the OCI Runtime executable. +// Depending if the container is privileged, it will return +// the privileged runtime or not. +func (r *Runtime) Path(c *Container) string { + if c.privileged && r.privilegedPath != "" { + return r.privilegedPath + } + return r.path } @@ -107,7 +115,7 @@ func (r *Runtime) CreateContainer(c *Container) error { args = append(args, "-s") } args = append(args, "-c", c.name) - args = append(args, "-r", r.path) + args = append(args, "-r", r.Path(c)) args = append(args, "-b", c.bundlePath) args = append(args, "-p", filepath.Join(c.bundlePath, "pidfile")) if c.terminal { @@ -149,7 +157,7 @@ func (r *Runtime) CreateContainer(c *Container) error { func (r *Runtime) StartContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "start", c.name); err != nil { + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "start", c.name); err != nil { return err } c.state.Started = time.Now() @@ -209,7 +217,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp var args []string args = append(args, "-c", c.name) - args = append(args, "-r", r.path) + args = append(args, "-r", r.Path(c)) args = append(args, "-p", pidFile.Name()) args = append(args, "-e") if c.terminal { @@ -341,7 +349,7 @@ func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp func (r *Runtime) StopContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", c.name, "TERM"); err != nil { + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", c.name, "TERM"); err != nil { return err } i := 0 @@ -369,14 +377,14 @@ func (r *Runtime) StopContainer(c *Container) error { func (r *Runtime) DeleteContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "delete", c.name) + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "delete", c.name) } // UpdateStatus refreshes the status of the container. func (r *Runtime) UpdateStatus(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() - out, err := exec.Command(r.path, "state", c.name).CombinedOutput() + out, err := exec.Command(r.Path(c), "state", c.name).CombinedOutput() if err != nil { return fmt.Errorf("error getting container state for %s: %s: %q", c.name, err, out) } @@ -426,6 +434,7 @@ type Container struct { sandbox string netns ns.NetNS terminal bool + privileged bool state *ContainerState metadata *pb.ContainerMetadata opLock sync.Mutex diff --git a/server/sandbox.go b/server/sandbox.go index 06edaded..65868a9f 100644 --- a/server/sandbox.go +++ b/server/sandbox.go @@ -139,6 +139,7 @@ type sandbox struct { metadata *pb.PodSandboxMetadata shmPath string cgroupParent string + privileged bool } const ( diff --git a/server/server.go b/server/server.go index f1d17450..0856b931 100644 --- a/server/server.go +++ b/server/server.go @@ -452,7 +452,7 @@ func New(config *Config) (*Server, error) { return nil, err } - r, err := oci.New(config.Runtime, config.Conmon, config.ConmonEnv, config.CgroupManager) + r, err := oci.New(config.Runtime, config.RuntimeHostPrivileged, config.Conmon, config.ConmonEnv, config.CgroupManager) if err != nil { return nil, err } From 2ec696be41212ebf975c9b092b771d318e0f98a2 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 21 Feb 2017 18:19:06 +0100 Subject: [PATCH 3/4] server: Set sandbox and container privileged flags The sandbox privileged flag is set to true only if either the pod configuration privileged flag is set to true or when any of the pod namespaces are the host ones. A container inherit its privileged flag from its sandbox, and will be run by the privileged runtime only if it's set to true. In other words, the privileged runtime (when defined) will be when one of the below conditions is true: - The sandbox will be asked to run at least one privileged container. - The sandbox requires access to either the host IPC or networking namespaces. Signed-off-by: Samuel Ortiz --- oci/oci.go | 3 ++- server/container_create.go | 2 +- server/sandbox_run.go | 32 +++++++++++++++++++++++++++++++- server/server.go | 8 ++++++-- 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/oci/oci.go b/oci/oci.go index e6e4a158..83f8a72f 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -450,7 +450,7 @@ type ContainerState struct { } // NewContainer creates a container object. -func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) { +func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool, privileged bool) (*Container, error) { c := &Container{ id: id, name: name, @@ -460,6 +460,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, net sandbox: sandbox, netns: netns, terminal: terminal, + privileged: privileged, metadata: metadata, annotations: annotations, image: image, diff --git a/server/container_create.go b/server/container_create.go index 8a98115d..857adb8b 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -384,7 +384,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, return nil, err } - container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty) + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.Tty, sb.privileged) if err != nil { return nil, err } diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 7cff2f3e..2dfe012f 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -17,6 +17,32 @@ import ( pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" ) +// privilegedSandbox returns true if the sandbox configuration +// requires additional host privileges for the sandbox. +func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool { + securityContext := req.GetConfig().GetLinux().GetSecurityContext() + if securityContext == nil { + return false + } + + if securityContext.Privileged { + return true + } + + namespaceOptions := securityContext.GetNamespaceOptions() + if namespaceOptions == nil { + return false + } + + if namespaceOptions.HostNetwork || + namespaceOptions.HostPid || + namespaceOptions.HostIpc { + return true + } + + return false +} + func (s *Server) runContainer(container *oci.Container) error { if err := s.runtime.CreateContainer(container); err != nil { return err @@ -218,6 +244,8 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } }() + privileged := s.privilegedSandbox(req) + g.AddAnnotation("ocid/metadata", string(metadataJSON)) g.AddAnnotation("ocid/labels", string(labelsJSON)) g.AddAnnotation("ocid/annotations", string(annotationsJSON)) @@ -228,6 +256,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation("ocid/container_name", containerName) g.AddAnnotation("ocid/container_id", id) g.AddAnnotation("ocid/shm_path", shmPath) + g.AddAnnotation("ocid/privileged_runtime", fmt.Sprintf("%v", privileged)) sb := &sandbox{ id: id, @@ -240,6 +269,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest mountLabel: mountLabel, metadata: metadata, shmPath: shmPath, + privileged: privileged, } s.addSandbox(sb) @@ -344,7 +374,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.name, id, err) } - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logDir, sb.netNs(), labels, annotations, nil, nil, id, false) + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logDir, sb.netNs(), labels, annotations, nil, nil, id, false, sb.privileged) if err != nil { return nil, err } diff --git a/server/server.go b/server/server.go index 0856b931..be7307a0 100644 --- a/server/server.go +++ b/server/server.go @@ -105,7 +105,7 @@ func (s *Server) loadContainer(id string) error { return err } - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty) + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty, sb.privileged) if err != nil { return err } @@ -173,6 +173,8 @@ func (s *Server) loadSandbox(id string) error { return err } + privileged := m.Annotations["ocid/privileged_runtime"] == "true" + sb := &sandbox{ id: id, name: name, @@ -184,6 +186,7 @@ func (s *Server) loadSandbox(id string) error { annotations: annotations, metadata: &metadata, shmPath: m.Annotations["ocid/shm_path"], + privileged: privileged, } // We add a netNS only if we can load a permanent one. @@ -223,7 +226,8 @@ func (s *Server) loadSandbox(id string) error { s.releaseContainerName(cname) } }() - scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false) + + scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false, privileged) if err != nil { return err } From f7eee71792b379c58505e0605b965dd55efe5dbd Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 22 Feb 2017 19:42:44 +0100 Subject: [PATCH 4/4] server: Reduce createSandboxContainer complexity By factorizing the bind mounts generation code. Signed-off-by: Samuel Ortiz --- server/container_create.go | 58 ++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/server/container_create.go b/server/container_create.go index 857adb8b..af31c364 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -25,6 +25,37 @@ const ( seccompLocalhostPrefix = "localhost/" ) +func addOciBindMounts(sb *sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { + mounts := containerConfig.GetMounts() + for _, mount := range mounts { + dest := mount.ContainerPath + if dest == "" { + return fmt.Errorf("Mount.ContainerPath is empty") + } + + src := mount.HostPath + if src == "" { + return fmt.Errorf("Mount.HostPath is empty") + } + + options := []string{"rw"} + if mount.Readonly { + options = []string{"ro"} + } + + if mount.SelinuxRelabel { + // Need a way in kubernetes to determine if the volume is shared or private + if err := label.Relabel(src, sb.mountLabel, true); err != nil && err != syscall.ENOTSUP { + return fmt.Errorf("relabel failed %s: %v", src, err) + } + } + + specgen.AddBindMount(src, dest, options) + } + + return nil +} + // CreateContainer creates a new container in specified PodSandbox func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) { logrus.Debugf("CreateContainerRequest %+v", req) @@ -145,31 +176,8 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } } - mounts := containerConfig.GetMounts() - for _, mount := range mounts { - dest := mount.ContainerPath - if dest == "" { - return nil, fmt.Errorf("Mount.ContainerPath is empty") - } - - src := mount.HostPath - if src == "" { - return nil, fmt.Errorf("Mount.HostPath is empty") - } - - options := []string{"rw"} - if mount.Readonly { - options = []string{"ro"} - } - - if mount.SelinuxRelabel { - // Need a way in kubernetes to determine if the volume is shared or private - if err := label.Relabel(src, sb.mountLabel, true); err != nil && err != syscall.ENOTSUP { - return nil, fmt.Errorf("relabel failed %s: %v", src, err) - } - } - - specgen.AddBindMount(src, dest, options) + if err := addOciBindMounts(sb, containerConfig, &specgen); err != nil { + return nil, err } labels := containerConfig.GetLabels()