From 322f7310e949c1d4a3bf943a0a3017772e2270f4 Mon Sep 17 00:00:00 2001 From: Crazykev Date: Tue, 13 Dec 2016 19:47:25 +0800 Subject: [PATCH] add redundant commit to , easy to review what was changed Signed-off-by: Crazykev --- manager/apparmor/aaparser.go | 89 +++++ manager/apparmor/apparmor_common.go | 14 + manager/apparmor/apparmor_supported.go | 145 ++++++++ manager/apparmor/apparmor_unsupported.go | 18 + manager/apparmor/template.go | 45 +++ manager/config.go | 154 ++++++++ manager/container.go | 36 ++ manager/container_attach.go | 11 + manager/container_create.go | 424 +++++++++++++++++++++++ manager/container_exec.go | 11 + manager/container_execsync.go | 46 +++ manager/container_list.go | 110 ++++++ manager/container_portforward.go | 11 + manager/container_remove.go | 53 +++ manager/container_start.go | 26 ++ manager/container_status.go | 59 ++++ manager/container_stop.go | 33 ++ manager/container_updateruntimeconfig.go | 11 + manager/image_list.go | 16 + manager/image_pull.go | 82 +++++ manager/image_remove.go | 13 + manager/image_status.go | 16 + manager/runtime_status.go | 41 +++ manager/sandbox.go | 270 +++++++++++++++ manager/sandbox_list.go | 92 +++++ manager/sandbox_remove.go | 95 +++++ manager/sandbox_run.go | 377 ++++++++++++++++++++ manager/sandbox_status.go | 62 ++++ manager/sandbox_stop.go | 61 ++++ manager/seccomp/seccomp.go | 149 ++++++++ manager/seccomp/seccomp_unsupported.go | 15 + manager/seccomp/types.go | 93 +++++ manager/server.go | 419 ++++++++++++++++++++++ manager/utils.go | 158 +++++++++ manager/version.go | 29 ++ 35 files changed, 3284 insertions(+) create mode 100644 manager/apparmor/aaparser.go create mode 100644 manager/apparmor/apparmor_common.go create mode 100644 manager/apparmor/apparmor_supported.go create mode 100644 manager/apparmor/apparmor_unsupported.go create mode 100644 manager/apparmor/template.go create mode 100644 manager/config.go create mode 100644 manager/container.go create mode 100644 manager/container_attach.go create mode 100644 manager/container_create.go create mode 100644 manager/container_exec.go create mode 100644 manager/container_execsync.go create mode 100644 manager/container_list.go create mode 100644 manager/container_portforward.go create mode 100644 manager/container_remove.go create mode 100644 manager/container_start.go create mode 100644 manager/container_status.go create mode 100644 manager/container_stop.go create mode 100644 manager/container_updateruntimeconfig.go create mode 100644 manager/image_list.go create mode 100644 manager/image_pull.go create mode 100644 manager/image_remove.go create mode 100644 manager/image_status.go create mode 100644 manager/runtime_status.go create mode 100644 manager/sandbox.go create mode 100644 manager/sandbox_list.go create mode 100644 manager/sandbox_remove.go create mode 100644 manager/sandbox_run.go create mode 100644 manager/sandbox_status.go create mode 100644 manager/sandbox_stop.go create mode 100644 manager/seccomp/seccomp.go create mode 100644 manager/seccomp/seccomp_unsupported.go create mode 100644 manager/seccomp/types.go create mode 100644 manager/server.go create mode 100644 manager/utils.go create mode 100644 manager/version.go diff --git a/manager/apparmor/aaparser.go b/manager/apparmor/aaparser.go new file mode 100644 index 00000000..7f0f02ac --- /dev/null +++ b/manager/apparmor/aaparser.go @@ -0,0 +1,89 @@ +// +build apparmor + +package apparmor + +import ( + "fmt" + "os/exec" + "strconv" + "strings" +) + +const ( + binary = "apparmor_parser" +) + +// GetVersion returns the major and minor version of apparmor_parser. +func GetVersion() (int, error) { + output, err := cmd("", "--version") + if err != nil { + return -1, err + } + + return parseVersion(output) +} + +// LoadProfile runs `apparmor_parser -r` on a specified apparmor profile to +// replace the profile. +func LoadProfile(profilePath string) error { + _, err := cmd("", "-r", profilePath) + return err +} + +// cmd runs `apparmor_parser` with the passed arguments. +func cmd(dir string, arg ...string) (string, error) { + c := exec.Command(binary, arg...) + c.Dir = dir + + output, err := c.CombinedOutput() + if err != nil { + return "", fmt.Errorf("running `%s %s` failed with output: %s\nerror: %v", c.Path, strings.Join(c.Args, " "), output, err) + } + + return string(output), nil +} + +// parseVersion takes the output from `apparmor_parser --version` and returns +// a representation of the {major, minor, patch} version as a single number of +// the form MMmmPPP {major, minor, patch}. +func parseVersion(output string) (int, error) { + // output is in the form of the following: + // AppArmor parser version 2.9.1 + // Copyright (C) 1999-2008 Novell Inc. + // Copyright 2009-2012 Canonical Ltd. + + lines := strings.SplitN(output, "\n", 2) + words := strings.Split(lines[0], " ") + version := words[len(words)-1] + + // split by major minor version + v := strings.Split(version, ".") + if len(v) == 0 || len(v) > 3 { + return -1, fmt.Errorf("parsing version failed for output: `%s`", output) + } + + // Default the versions to 0. + var majorVersion, minorVersion, patchLevel int + + majorVersion, err := strconv.Atoi(v[0]) + if err != nil { + return -1, err + } + + if len(v) > 1 { + minorVersion, err = strconv.Atoi(v[1]) + if err != nil { + return -1, err + } + } + if len(v) > 2 { + patchLevel, err = strconv.Atoi(v[2]) + if err != nil { + return -1, err + } + } + + // major*10^5 + minor*10^3 + patch*10^0 + numericVersion := majorVersion*1e5 + minorVersion*1e3 + patchLevel + return numericVersion, nil +} diff --git a/manager/apparmor/apparmor_common.go b/manager/apparmor/apparmor_common.go new file mode 100644 index 00000000..43670865 --- /dev/null +++ b/manager/apparmor/apparmor_common.go @@ -0,0 +1,14 @@ +package apparmor + +const ( + // DefaultApparmorProfile is the name of default apparmor profile name. + DefaultApparmorProfile = "ocid-default" + + // ContainerAnnotationKeyPrefix is the prefix to an annotation key specifying a container profile. + ContainerAnnotationKeyPrefix = "container.apparmor.security.beta.kubernetes.io/" + + // ProfileRuntimeDefault is he profile specifying the runtime default. + ProfileRuntimeDefault = "runtime/default" + // ProfileNamePrefix is the prefix for specifying profiles loaded on the node. + ProfileNamePrefix = "localhost/" +) diff --git a/manager/apparmor/apparmor_supported.go b/manager/apparmor/apparmor_supported.go new file mode 100644 index 00000000..d765c9de --- /dev/null +++ b/manager/apparmor/apparmor_supported.go @@ -0,0 +1,145 @@ +// +build apparmor + +package apparmor + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "strings" + + "github.com/docker/docker/utils/templates" + "github.com/opencontainers/runc/libcontainer/apparmor" +) + +const ( + // profileDirectory is the file store for apparmor profiles and macros. + profileDirectory = "/etc/apparmor.d" +) + +// profileData holds information about the given profile for generation. +type profileData struct { + // Name is profile name. + Name string + // Imports defines the apparmor functions to import, before defining the profile. + Imports []string + // InnerImports defines the apparmor functions to import in the profile. + InnerImports []string + // Version is the {major, minor, patch} version of apparmor_parser as a single number. + Version int +} + +// EnsureDefaultApparmorProfile loads default apparmor profile, if it is not loaded. +func EnsureDefaultApparmorProfile() error { + if apparmor.IsEnabled() { + loaded, err := IsLoaded(DefaultApparmorProfile) + if err != nil { + return fmt.Errorf("Could not check if %s AppArmor profile was loaded: %s", DefaultApparmorProfile, err) + } + + // Nothing to do. + if loaded { + return nil + } + + // Load the profile. + if err := InstallDefault(DefaultApparmorProfile); err != nil { + return fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", DefaultApparmorProfile) + } + } + + return nil +} + +// IsEnabled returns true if apparmor is enabled for the host. +func IsEnabled() bool { + return apparmor.IsEnabled() +} + +// GetProfileNameFromPodAnnotations gets the name of the profile to use with container from +// pod annotations +func GetProfileNameFromPodAnnotations(annotations map[string]string, containerName string) string { + return annotations[ContainerAnnotationKeyPrefix+containerName] +} + +// InstallDefault generates a default profile in a temp directory determined by +// os.TempDir(), then loads the profile into the kernel using 'apparmor_parser'. +func InstallDefault(name string) error { + p := profileData{ + Name: name, + } + + // Install to a temporary directory. + f, err := ioutil.TempFile("", name) + if err != nil { + return err + } + defer f.Close() + + if err := p.generateDefault(f); err != nil { + return err + } + + return LoadProfile(f.Name()) +} + +// IsLoaded checks if a profile with the given name has been loaded into the +// kernel. +func IsLoaded(name string) (bool, error) { + file, err := os.Open("/sys/kernel/security/apparmor/profiles") + if err != nil { + return false, err + } + defer file.Close() + + r := bufio.NewReader(file) + for { + p, err := r.ReadString('\n') + if err == io.EOF { + break + } + if err != nil { + return false, err + } + if strings.HasPrefix(p, name+" ") { + return true, nil + } + } + + return false, nil +} + +// generateDefault creates an apparmor profile from ProfileData. +func (p *profileData) generateDefault(out io.Writer) error { + compiled, err := templates.NewParse("apparmor_profile", baseTemplate) + if err != nil { + return err + } + + if macroExists("tunables/global") { + p.Imports = append(p.Imports, "#include ") + } else { + p.Imports = append(p.Imports, "@{PROC}=/proc/") + } + + if macroExists("abstractions/base") { + p.InnerImports = append(p.InnerImports, "#include ") + } + + ver, err := GetVersion() + if err != nil { + return err + } + p.Version = ver + + return compiled.Execute(out, p) +} + +// macrosExists checks if the passed macro exists. +func macroExists(m string) bool { + _, err := os.Stat(path.Join(profileDirectory, m)) + return err == nil +} diff --git a/manager/apparmor/apparmor_unsupported.go b/manager/apparmor/apparmor_unsupported.go new file mode 100644 index 00000000..fbd1d87a --- /dev/null +++ b/manager/apparmor/apparmor_unsupported.go @@ -0,0 +1,18 @@ +// +build !apparmor + +package apparmor + +// IsEnabled returns false, when build without apparmor build tag. +func IsEnabled() bool { + return false +} + +// EnsureDefaultApparmorProfile dose nothing, when build without apparmor build tag. +func EnsureDefaultApparmorProfile() error { + return nil +} + +// GetProfileNameFromPodAnnotations dose nothing, when build without apparmor build tag. +func GetProfileNameFromPodAnnotations(annotations map[string]string, containerName string) string { + return "" +} diff --git a/manager/apparmor/template.go b/manager/apparmor/template.go new file mode 100644 index 00000000..6656ff61 --- /dev/null +++ b/manager/apparmor/template.go @@ -0,0 +1,45 @@ +// +build apparmor + +package apparmor + +// baseTemplate defines the default apparmor profile for containers. +const baseTemplate = ` +{{range $value := .Imports}} +{{$value}} +{{end}} + +profile {{.Name}} flags=(attach_disconnected,mediate_deleted) { +{{range $value := .InnerImports}} + {{$value}} +{{end}} + + network, + capability, + file, + umount, + + deny @{PROC}/* w, # deny write for all files directly in /proc (not in a subdir) + # deny write to files not in /proc//** or /proc/sys/** + deny @{PROC}/{[^1-9],[^1-9][^0-9],[^1-9s][^0-9y][^0-9s],[^1-9][^0-9][^0-9][^0-9]*}/** w, + deny @{PROC}/sys/[^k]** w, # deny /proc/sys except /proc/sys/k* (effectively /proc/sys/kernel) + deny @{PROC}/sys/kernel/{?,??,[^s][^h][^m]**} w, # deny everything except shm* in /proc/sys/kernel/ + deny @{PROC}/sysrq-trigger rwklx, + deny @{PROC}/mem rwklx, + deny @{PROC}/kmem rwklx, + deny @{PROC}/kcore rwklx, + + deny mount, + + deny /sys/[^f]*/** wklx, + deny /sys/f[^s]*/** wklx, + deny /sys/fs/[^c]*/** wklx, + deny /sys/fs/c[^g]*/** wklx, + deny /sys/fs/cg[^r]*/** wklx, + deny /sys/firmware/** rwklx, + deny /sys/kernel/security/** rwklx, + +{{if ge .Version 208095}} + ptrace (trace,read) peer={{.Name}}, +{{end}} +} +` diff --git a/manager/config.go b/manager/config.go new file mode 100644 index 00000000..75e93aa3 --- /dev/null +++ b/manager/config.go @@ -0,0 +1,154 @@ +package server + +import ( + "bytes" + "io/ioutil" + + "github.com/BurntSushi/toml" +) + +// Config represents the entire set of configuration values that can be set for +// the server. This is intended to be loaded from a toml-encoded config file. +type Config struct { + RootConfig + APIConfig + RuntimeConfig + ImageConfig +} + +// This structure is necessary to fake the TOML tables when parsing, +// while also not requiring a bunch of layered structs for no good +// reason. + +// RootConfig represents the root of the "ocid" TOML config table. +type RootConfig struct { + // Root is a path to the "root directory" where all information not + // explicitly handled by other options will be stored. + Root string `toml:"root"` + + // SandboxDir is the directory where ocid will store all of its sandbox + // state and other information. + SandboxDir string `toml:"sandbox_dir"` + + // ContainerDir is the directory where ocid will store all of its container + // state and other information. + ContainerDir string `toml:"container_dir"` + + // LogDir is the default log directory were all logs will go unless kubelet + // tells us to put them somewhere else. + // + // TODO: This is currently unused until the conmon logging rewrite is done. + LogDir string `toml:"log_dir"` +} + +// APIConfig represents the "ocid.api" TOML config table. +type APIConfig struct { + // Listen is the path to the AF_LOCAL socket on which cri-o will listen. + // This may support proto://addr formats later, but currently this is just + // a path. + Listen string `toml:"listen"` +} + +// RuntimeConfig represents the "ocid.runtime" TOML config table. +type RuntimeConfig struct { + // Runtime is a path to the OCI runtime which ocid will be using. Currently + // the only known working choice is runC, simply because the OCI has not + // yet merged a CLI API (so we assume runC's API here). + Runtime string `toml:"runtime"` + + // Conmon is the path to conmon binary, used for managing the runtime. + Conmon string `toml:"conmon"` + + // ConmonEnv is the environment variable list for conmon process. + ConmonEnv []string `toml:"conmon_env"` + + // SELinux determines whether or not SELinux is used for pod separation. + SELinux bool `toml:"selinux"` + + // SeccompProfile is the seccomp json profile path which is used as the + // default for the runtime. + SeccompProfile string `toml:"seccomp_profile"` + + // ApparmorProfile is the apparmor profile name which is used as the + // default for the runtime. + ApparmorProfile string `toml:"apparmor_profile"` +} + +// ImageConfig represents the "ocid.image" TOML config table. +type ImageConfig struct { + // Pause is the path to the statically linked pause container binary, used + // as the entrypoint for infra containers. + // + // TODO(cyphar): This should be replaced with a path to an OCI image + // bundle, once the OCI image/storage code has been implemented. + Pause string `toml:"pause"` + + // ImageStore is the directory where the ocid image store will be stored. + // TODO: This is currently not really used because we don't have + // containers/storage integrated. + ImageDir string `toml:"image_dir"` +} + +// tomlConfig is another way of looking at a Config, which is +// TOML-friendly (it has all of the explicit tables). It's just used for +// conversions. +type tomlConfig struct { + Ocid struct { + RootConfig + API struct{ APIConfig } `toml:"api"` + Runtime struct{ RuntimeConfig } `toml:"runtime"` + Image struct{ ImageConfig } `toml:"image"` + } `toml:"ocid"` +} + +func (t *tomlConfig) toConfig(c *Config) { + c.RootConfig = t.Ocid.RootConfig + c.APIConfig = t.Ocid.API.APIConfig + c.RuntimeConfig = t.Ocid.Runtime.RuntimeConfig + c.ImageConfig = t.Ocid.Image.ImageConfig +} + +func (t *tomlConfig) fromConfig(c *Config) { + t.Ocid.RootConfig = c.RootConfig + t.Ocid.API.APIConfig = c.APIConfig + t.Ocid.Runtime.RuntimeConfig = c.RuntimeConfig + t.Ocid.Image.ImageConfig = c.ImageConfig +} + +// FromFile populates the Config from the TOML-encoded file at the given path. +// Returns errors encountered when reading or parsing the files, or nil +// otherwise. +func (c *Config) FromFile(path string) error { + data, err := ioutil.ReadFile(path) + if err != nil { + return err + } + + t := new(tomlConfig) + t.fromConfig(c) + + _, err = toml.Decode(string(data), t) + if err != nil { + return err + } + + t.toConfig(c) + return nil +} + +// ToFile outputs the given Config as a TOML-encoded file at the given path. +// Returns errors encountered when generating or writing the file, or nil +// otherwise. +func (c *Config) ToFile(path string) error { + var w bytes.Buffer + e := toml.NewEncoder(&w) + + t := new(tomlConfig) + t.fromConfig(c) + + if err := e.Encode(*t); err != nil { + return err + } + + return ioutil.WriteFile(path, w.Bytes(), 0644) +} diff --git a/manager/container.go b/manager/container.go new file mode 100644 index 00000000..b8b43ae7 --- /dev/null +++ b/manager/container.go @@ -0,0 +1,36 @@ +package server + +import ( + "fmt" + + "github.com/kubernetes-incubator/cri-o/oci" +) + +const ( + // containerTypeSandbox represents a pod sandbox container + containerTypeSandbox = "sandbox" + // containerTypeContainer represents a container running within a pod + containerTypeContainer = "container" +) + +type containerRequest interface { + GetContainerId() string +} + +func (s *Server) getContainerFromRequest(req containerRequest) (*oci.Container, error) { + ctrID := req.GetContainerId() + if ctrID == "" { + return nil, fmt.Errorf("container ID should not be empty") + } + + containerID, err := s.ctrIDIndex.Get(ctrID) + if err != nil { + return nil, fmt.Errorf("container with ID starting with %s not found: %v", ctrID, err) + } + + c := s.state.containers.Get(containerID) + if c == nil { + return nil, fmt.Errorf("specified container not found: %s", containerID) + } + return c, nil +} diff --git a/manager/container_attach.go b/manager/container_attach.go new file mode 100644 index 00000000..96e2676b --- /dev/null +++ b/manager/container_attach.go @@ -0,0 +1,11 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// Attach prepares a streaming endpoint to attach to a running container. +func (s *Server) Attach(ctx context.Context, req *pb.AttachRequest) (*pb.AttachResponse, error) { + return nil, nil +} diff --git a/manager/container_create.go b/manager/container_create.go new file mode 100644 index 00000000..2afa3bd5 --- /dev/null +++ b/manager/container_create.go @@ -0,0 +1,424 @@ +package server + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "syscall" + + "github.com/Sirupsen/logrus" + "github.com/docker/docker/pkg/stringid" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/apparmor" + "github.com/kubernetes-incubator/cri-o/server/seccomp" + "github.com/kubernetes-incubator/cri-o/utils" + "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/runtime-tools/generate" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +const ( + seccompUnconfined = "unconfined" + seccompRuntimeDefault = "runtime/default" + seccompLocalhostPrefix = "localhost/" +) + +// CreateContainer creates a new container in specified PodSandbox +func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) { + logrus.Debugf("CreateContainerRequest %+v", req) + sbID := req.GetPodSandboxId() + if sbID == "" { + return nil, fmt.Errorf("PodSandboxId should not be empty") + } + + sandboxID, err := s.podIDIndex.Get(sbID) + if err != nil { + return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", sbID, err) + } + + sb := s.getSandbox(sandboxID) + if sb == nil { + return nil, fmt.Errorf("specified sandbox not found: %s", sandboxID) + } + + // The config of the container + containerConfig := req.GetConfig() + if containerConfig == nil { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig is nil") + } + + name := containerConfig.GetMetadata().GetName() + if name == "" { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Name is empty") + } + + attempt := containerConfig.GetMetadata().GetAttempt() + containerID, containerName, err := s.generateContainerIDandName(sb.name, name, attempt) + if err != nil { + return nil, err + } + + // containerDir is the dir for the container bundle. + containerDir := filepath.Join(s.runtime.ContainerDir(), containerID) + defer func() { + if err != nil { + s.releaseContainerName(containerName) + err1 := os.RemoveAll(containerDir) + if err1 != nil { + logrus.Warnf("Failed to cleanup container directory: %v", err1) + } + } + }() + + if _, err = os.Stat(containerDir); err == nil { + return nil, fmt.Errorf("container (%s) already exists", containerDir) + } + + if err = os.MkdirAll(containerDir, 0755); err != nil { + return nil, err + } + + container, err := s.createSandboxContainer(containerID, containerName, sb, containerDir, containerConfig) + if err != nil { + return nil, err + } + + if err = s.runtime.CreateContainer(container); err != nil { + return nil, err + } + + if err = s.runtime.UpdateStatus(container); err != nil { + return nil, err + } + + s.addContainer(container) + + if err = s.ctrIDIndex.Add(containerID); err != nil { + s.removeContainer(container) + return nil, err + } + + resp := &pb.CreateContainerResponse{ + ContainerId: &containerID, + } + + logrus.Debugf("CreateContainerResponse: %+v", resp) + return resp, nil +} + +func (s *Server) createSandboxContainer(containerID string, containerName string, sb *sandbox, containerDir string, containerConfig *pb.ContainerConfig) (*oci.Container, error) { + if sb == nil { + return nil, errors.New("createSandboxContainer needs a sandbox") + } + // creates a spec Generator with the default spec. + specgen := generate.New() + + // by default, the root path is an empty string. + // here set it to be "rootfs". + specgen.SetRootPath("rootfs") + + args := containerConfig.GetArgs() + if args == nil { + args = []string{"/bin/sh"} + } + specgen.SetProcessArgs(args) + + cwd := containerConfig.GetWorkingDir() + if cwd == "" { + cwd = "/" + } + specgen.SetProcessCwd(cwd) + + envs := containerConfig.GetEnvs() + if envs != nil { + for _, item := range envs { + key := item.GetKey() + value := item.GetValue() + if key == "" { + continue + } + env := fmt.Sprintf("%s=%s", key, value) + specgen.AddProcessEnv(env) + } + } + + mounts := containerConfig.GetMounts() + for _, mount := range mounts { + dest := mount.GetContainerPath() + if dest == "" { + return nil, fmt.Errorf("Mount.ContainerPath is empty") + } + + src := mount.GetHostPath() + if src == "" { + return nil, fmt.Errorf("Mount.HostPath is empty") + } + + options := []string{"rw"} + if mount.GetReadonly() { + options = []string{"ro"} + } + + if mount.GetSelinuxRelabel() { + // Need a way in kubernetes to determine if the volume is shared or private + if err := label.Relabel(src, sb.mountLabel, true); err != nil && err != syscall.ENOTSUP { + return nil, fmt.Errorf("relabel failed %s: %v", src, err) + } + } + + specgen.AddBindMount(src, dest, options) + } + + labels := containerConfig.GetLabels() + + metadata := containerConfig.GetMetadata() + + annotations := containerConfig.GetAnnotations() + if annotations != nil { + for k, v := range annotations { + specgen.AddAnnotation(k, v) + } + } + + // set this container's apparmor profile if it is set by sandbox + if s.appArmorEnabled { + appArmorProfileName := s.getAppArmorProfileName(sb.annotations, metadata.GetName()) + if appArmorProfileName != "" { + // reload default apparmor profile if it is unloaded. + if s.appArmorProfile == apparmor.DefaultApparmorProfile { + if err := apparmor.EnsureDefaultApparmorProfile(); err != nil { + return nil, err + } + } + + specgen.SetProcessApparmorProfile(appArmorProfileName) + } + } + + if containerConfig.GetLinux().GetSecurityContext().GetPrivileged() { + specgen.SetupPrivileged(true) + } + + if containerConfig.GetLinux().GetSecurityContext().GetReadonlyRootfs() { + specgen.SetRootReadonly(true) + } + + logPath := containerConfig.GetLogPath() + + if containerConfig.GetTty() { + specgen.SetProcessTerminal(true) + } + + linux := containerConfig.GetLinux() + if linux != nil { + resources := linux.GetResources() + if resources != nil { + cpuPeriod := resources.GetCpuPeriod() + if cpuPeriod != 0 { + specgen.SetLinuxResourcesCPUPeriod(uint64(cpuPeriod)) + } + + cpuQuota := resources.GetCpuQuota() + if cpuQuota != 0 { + specgen.SetLinuxResourcesCPUQuota(uint64(cpuQuota)) + } + + cpuShares := resources.GetCpuShares() + if cpuShares != 0 { + specgen.SetLinuxResourcesCPUShares(uint64(cpuShares)) + } + + memoryLimit := resources.GetMemoryLimitInBytes() + if memoryLimit != 0 { + specgen.SetLinuxResourcesMemoryLimit(uint64(memoryLimit)) + } + + oomScoreAdj := resources.GetOomScoreAdj() + specgen.SetLinuxResourcesOOMScoreAdj(int(oomScoreAdj)) + } + + capabilities := linux.GetSecurityContext().GetCapabilities() + if capabilities != nil { + addCaps := capabilities.GetAddCapabilities() + if addCaps != nil { + for _, cap := range addCaps { + if err := specgen.AddProcessCapability(cap); err != nil { + return nil, err + } + } + } + + dropCaps := capabilities.GetDropCapabilities() + if dropCaps != nil { + for _, cap := range dropCaps { + if err := specgen.DropProcessCapability(cap); err != nil { + return nil, err + } + } + } + } + + specgen.SetProcessSelinuxLabel(sb.processLabel) + specgen.SetLinuxMountLabel(sb.mountLabel) + + user := linux.GetSecurityContext().GetRunAsUser() + specgen.SetProcessUID(uint32(user)) + + specgen.SetProcessGID(uint32(user)) + + groups := linux.GetSecurityContext().GetSupplementalGroups() + for _, group := range groups { + specgen.AddProcessAdditionalGid(uint32(group)) + } + } + // Join the namespace paths for the pod sandbox container. + podInfraState := s.runtime.ContainerStatus(sb.infraContainer) + + logrus.Debugf("pod container state %+v", podInfraState) + + ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid) + if err := specgen.AddOrReplaceLinuxNamespace("ipc", ipcNsPath); err != nil { + return nil, err + } + + netNsPath := sb.netNsPath() + if netNsPath == "" { + // The sandbox does not have a permanent namespace, + // it's on the host one. + netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid) + } + + if err := specgen.AddOrReplaceLinuxNamespace("network", netNsPath); err != nil { + return nil, err + } + + imageSpec := containerConfig.GetImage() + if imageSpec == nil { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image is nil") + } + + image := imageSpec.GetImage() + if image == "" { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image.Image is empty") + } + + // bind mount the pod shm + specgen.AddBindMount(sb.shmPath, "/dev/shm", []string{"rw"}) + + specgen.AddAnnotation("ocid/name", containerName) + specgen.AddAnnotation("ocid/sandbox_id", sb.id) + specgen.AddAnnotation("ocid/sandbox_name", sb.infraContainer.Name()) + specgen.AddAnnotation("ocid/container_type", containerTypeContainer) + specgen.AddAnnotation("ocid/log_path", logPath) + specgen.AddAnnotation("ocid/tty", fmt.Sprintf("%v", containerConfig.GetTty())) + specgen.AddAnnotation("ocid/image", image) + + metadataJSON, err := json.Marshal(metadata) + if err != nil { + return nil, err + } + specgen.AddAnnotation("ocid/metadata", string(metadataJSON)) + + labelsJSON, err := json.Marshal(labels) + if err != nil { + return nil, err + } + specgen.AddAnnotation("ocid/labels", string(labelsJSON)) + + annotationsJSON, err := json.Marshal(annotations) + if err != nil { + return nil, err + } + specgen.AddAnnotation("ocid/annotations", string(annotationsJSON)) + + if err = s.setupSeccomp(&specgen, containerName, sb.annotations); err != nil { + return nil, err + } + + if err = specgen.SaveToFile(filepath.Join(containerDir, "config.json"), generate.ExportOptions{}); err != nil { + return nil, err + } + + // TODO: copy the rootfs into the bundle. + // Currently, utils.CreateFakeRootfs is used to populate the rootfs. + if err = utils.CreateFakeRootfs(containerDir, image); err != nil { + return nil, err + } + + container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty()) + if err != nil { + return nil, err + } + + return container, nil +} + +func (s *Server) setupSeccomp(specgen *generate.Generator, cname string, sbAnnotations map[string]string) error { + profile, ok := sbAnnotations["security.alpha.kubernetes.io/seccomp/container/"+cname] + if !ok { + profile, ok = sbAnnotations["security.alpha.kubernetes.io/seccomp/pod"] + if !ok { + // running w/o seccomp, aka unconfined + profile = seccompUnconfined + } + } + if !s.seccompEnabled { + if profile != seccompUnconfined { + return fmt.Errorf("seccomp is not enabled in your kernel, cannot run with a profile") + } + logrus.Warn("seccomp is not enabled in your kernel, running container without profile") + } + if profile == seccompUnconfined { + // running w/o seccomp, aka unconfined + specgen.Spec().Linux.Seccomp = nil + return nil + } + if profile == seccompRuntimeDefault { + return seccomp.LoadProfileFromStruct(s.seccompProfile, specgen) + } + if !strings.HasPrefix(profile, seccompLocalhostPrefix) { + return fmt.Errorf("unknown seccomp profile option: %q", profile) + } + //file, err := ioutil.ReadFile(filepath.Join(s.seccompProfileRoot, strings.TrimPrefix(profile, seccompLocalhostPrefix))) + //if err != nil { + //return err + //} + // TODO(runcom): setup from provided node's seccomp profile + // can't do this yet, see https://issues.k8s.io/36997 + return nil +} + +func (s *Server) generateContainerIDandName(podName string, name string, attempt uint32) (string, string, error) { + var ( + err error + id = stringid.GenerateNonCryptoID() + ) + nameStr := fmt.Sprintf("%s-%s-%v", podName, name, attempt) + if name == "infra" { + nameStr = fmt.Sprintf("%s-%s", podName, name) + } + if name, err = s.reserveContainerName(id, nameStr); err != nil { + return "", "", err + } + return id, name, err +} + +// getAppArmorProfileName gets the profile name for the given container. +func (s *Server) getAppArmorProfileName(annotations map[string]string, ctrName string) string { + profile := apparmor.GetProfileNameFromPodAnnotations(annotations, ctrName) + + if profile == "" { + return "" + } + + if profile == apparmor.ProfileRuntimeDefault { + // If the value is runtime/default, then return default profile. + return s.appArmorProfile + } + + return strings.TrimPrefix(profile, apparmor.ProfileNamePrefix) +} diff --git a/manager/container_exec.go b/manager/container_exec.go new file mode 100644 index 00000000..9f1c400e --- /dev/null +++ b/manager/container_exec.go @@ -0,0 +1,11 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// Exec prepares a streaming endpoint to execute a command in the container. +func (s *Server) Exec(ctx context.Context, req *pb.ExecRequest) (*pb.ExecResponse, error) { + return nil, nil +} diff --git a/manager/container_execsync.go b/manager/container_execsync.go new file mode 100644 index 00000000..3acf0abb --- /dev/null +++ b/manager/container_execsync.go @@ -0,0 +1,46 @@ +package server + +import ( + "fmt" + + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// ExecSync runs a command in a container synchronously. +func (s *Server) ExecSync(ctx context.Context, req *pb.ExecSyncRequest) (*pb.ExecSyncResponse, error) { + logrus.Debugf("ExecSyncRequest %+v", req) + c, err := s.getContainerFromRequest(req) + if err != nil { + return nil, err + } + + if err = s.runtime.UpdateStatus(c); err != nil { + return nil, err + } + + cState := s.runtime.ContainerStatus(c) + if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) { + return nil, fmt.Errorf("container is not created or running") + } + + cmd := req.GetCmd() + if cmd == nil { + return nil, fmt.Errorf("exec command cannot be empty") + } + + execResp, err := s.runtime.ExecSync(c, cmd, req.GetTimeout()) + if err != nil { + return nil, err + } + resp := &pb.ExecSyncResponse{ + Stdout: execResp.Stdout, + Stderr: execResp.Stderr, + ExitCode: &execResp.ExitCode, + } + + logrus.Debugf("ExecSyncResponse: %+v", resp) + return resp, nil +} diff --git a/manager/container_list.go b/manager/container_list.go new file mode 100644 index 00000000..776b85bc --- /dev/null +++ b/manager/container_list.go @@ -0,0 +1,110 @@ +package server + +import ( + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + "k8s.io/kubernetes/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// filterContainer returns whether passed container matches filtering criteria +func filterContainer(c *pb.Container, filter *pb.ContainerFilter) bool { + if filter != nil { + if filter.State != nil { + if *c.State != *filter.State { + return false + } + } + if filter.LabelSelector != nil { + sel := fields.SelectorFromSet(filter.LabelSelector) + if !sel.Matches(fields.Set(c.Labels)) { + return false + } + } + } + return true +} + +// ListContainers lists all containers by filters. +func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersRequest) (*pb.ListContainersResponse, error) { + logrus.Debugf("ListContainersRequest %+v", req) + var ctrs []*pb.Container + filter := req.Filter + ctrList := s.state.containers.List() + + // Filter using container id and pod id first. + if filter != nil { + if filter.Id != nil { + id, err := s.ctrIDIndex.Get(*filter.Id) + if err != nil { + return nil, err + } + c := s.state.containers.Get(id) + if c != nil { + if filter.PodSandboxId != nil { + if c.Sandbox() == *filter.PodSandboxId { + ctrList = []*oci.Container{c} + } else { + ctrList = []*oci.Container{} + } + + } else { + ctrList = []*oci.Container{c} + } + } + } else { + if filter.PodSandboxId != nil { + pod := s.state.sandboxes[*filter.PodSandboxId] + if pod == nil { + ctrList = []*oci.Container{} + } else { + ctrList = pod.containers.List() + } + } + } + } + + for _, ctr := range ctrList { + if err := s.runtime.UpdateStatus(ctr); err != nil { + return nil, err + } + + podSandboxID := ctr.Sandbox() + cState := s.runtime.ContainerStatus(ctr) + created := cState.Created.UnixNano() + rState := pb.ContainerState_CONTAINER_UNKNOWN + cID := ctr.ID() + + c := &pb.Container{ + Id: &cID, + PodSandboxId: &podSandboxID, + CreatedAt: int64Ptr(created), + Labels: ctr.Labels(), + Metadata: ctr.Metadata(), + Annotations: ctr.Annotations(), + Image: ctr.Image(), + } + + switch cState.Status { + case oci.ContainerStateCreated: + rState = pb.ContainerState_CONTAINER_CREATED + case oci.ContainerStateRunning: + rState = pb.ContainerState_CONTAINER_RUNNING + case oci.ContainerStateStopped: + rState = pb.ContainerState_CONTAINER_EXITED + } + c.State = &rState + + // Filter by other criteria such as state and labels. + if filterContainer(c, req.Filter) { + ctrs = append(ctrs, c) + } + } + + resp := &pb.ListContainersResponse{ + Containers: ctrs, + } + logrus.Debugf("ListContainersResponse: %+v", resp) + return resp, nil +} diff --git a/manager/container_portforward.go b/manager/container_portforward.go new file mode 100644 index 00000000..ab665c23 --- /dev/null +++ b/manager/container_portforward.go @@ -0,0 +1,11 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// PortForward prepares a streaming endpoint to forward ports from a PodSandbox. +func (s *Server) PortForward(ctx context.Context, req *pb.PortForwardRequest) (*pb.PortForwardResponse, error) { + return nil, nil +} diff --git a/manager/container_remove.go b/manager/container_remove.go new file mode 100644 index 00000000..37c318ff --- /dev/null +++ b/manager/container_remove.go @@ -0,0 +1,53 @@ +package server + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// RemoveContainer removes the container. If the container is running, the container +// should be force removed. +func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) { + logrus.Debugf("RemoveContainerRequest %+v", req) + c, err := s.getContainerFromRequest(req) + if err != nil { + return nil, err + } + + if err := s.runtime.UpdateStatus(c); err != nil { + return nil, fmt.Errorf("failed to update container state: %v", err) + } + + cState := s.runtime.ContainerStatus(c) + if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning { + if err := s.runtime.StopContainer(c); err != nil { + return nil, fmt.Errorf("failed to stop container %s: %v", c.ID(), err) + } + } + + if err := s.runtime.DeleteContainer(c); err != nil { + return nil, fmt.Errorf("failed to delete container %s: %v", c.ID(), err) + } + + containerDir := filepath.Join(s.runtime.ContainerDir(), c.ID()) + if err := os.RemoveAll(containerDir); err != nil { + return nil, fmt.Errorf("failed to remove container %s directory: %v", c.ID(), err) + } + + s.releaseContainerName(c.Name()) + s.removeContainer(c) + + if err := s.ctrIDIndex.Delete(c.ID()); err != nil { + return nil, err + } + + resp := &pb.RemoveContainerResponse{} + logrus.Debugf("RemoveContainerResponse: %+v", resp) + return resp, nil +} diff --git a/manager/container_start.go b/manager/container_start.go new file mode 100644 index 00000000..5e724865 --- /dev/null +++ b/manager/container_start.go @@ -0,0 +1,26 @@ +package server + +import ( + "fmt" + + "github.com/Sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// StartContainer starts the container. +func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerRequest) (*pb.StartContainerResponse, error) { + logrus.Debugf("StartContainerRequest %+v", req) + c, err := s.getContainerFromRequest(req) + if err != nil { + return nil, err + } + + if err := s.runtime.StartContainer(c); err != nil { + return nil, fmt.Errorf("failed to start container %s: %v", c.ID(), err) + } + + resp := &pb.StartContainerResponse{} + logrus.Debugf("StartContainerResponse %+v", resp) + return resp, nil +} diff --git a/manager/container_status.go b/manager/container_status.go new file mode 100644 index 00000000..fa07c89c --- /dev/null +++ b/manager/container_status.go @@ -0,0 +1,59 @@ +package server + +import ( + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// ContainerStatus returns status of the container. +func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusRequest) (*pb.ContainerStatusResponse, error) { + logrus.Debugf("ContainerStatusRequest %+v", req) + c, err := s.getContainerFromRequest(req) + if err != nil { + return nil, err + } + + if err := s.runtime.UpdateStatus(c); err != nil { + return nil, err + } + + containerID := c.ID() + resp := &pb.ContainerStatusResponse{ + Status: &pb.ContainerStatus{ + Id: &containerID, + Metadata: c.Metadata(), + }, + } + + cState := s.runtime.ContainerStatus(c) + rStatus := pb.ContainerState_CONTAINER_UNKNOWN + + switch cState.Status { + case oci.ContainerStateCreated: + rStatus = pb.ContainerState_CONTAINER_CREATED + created := cState.Created.UnixNano() + resp.Status.CreatedAt = int64Ptr(created) + case oci.ContainerStateRunning: + rStatus = pb.ContainerState_CONTAINER_RUNNING + created := cState.Created.UnixNano() + resp.Status.CreatedAt = int64Ptr(created) + started := cState.Started.UnixNano() + resp.Status.StartedAt = int64Ptr(started) + case oci.ContainerStateStopped: + rStatus = pb.ContainerState_CONTAINER_EXITED + created := cState.Created.UnixNano() + resp.Status.CreatedAt = int64Ptr(created) + started := cState.Started.UnixNano() + resp.Status.StartedAt = int64Ptr(started) + finished := cState.Finished.UnixNano() + resp.Status.FinishedAt = int64Ptr(finished) + resp.Status.ExitCode = int32Ptr(cState.ExitCode) + } + + resp.Status.State = &rStatus + + logrus.Debugf("ContainerStatusResponse: %+v", resp) + return resp, nil +} diff --git a/manager/container_stop.go b/manager/container_stop.go new file mode 100644 index 00000000..1aba8801 --- /dev/null +++ b/manager/container_stop.go @@ -0,0 +1,33 @@ +package server + +import ( + "fmt" + + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// StopContainer stops a running container with a grace period (i.e., timeout). +func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) { + logrus.Debugf("StopContainerRequest %+v", req) + c, err := s.getContainerFromRequest(req) + if err != nil { + return nil, err + } + + if err := s.runtime.UpdateStatus(c); err != nil { + return nil, err + } + cStatus := s.runtime.ContainerStatus(c) + if cStatus.Status != oci.ContainerStateStopped { + if err := s.runtime.StopContainer(c); err != nil { + return nil, fmt.Errorf("failed to stop container %s: %v", c.ID(), err) + } + } + + resp := &pb.StopContainerResponse{} + logrus.Debugf("StopContainerResponse: %+v", resp) + return resp, nil +} diff --git a/manager/container_updateruntimeconfig.go b/manager/container_updateruntimeconfig.go new file mode 100644 index 00000000..954ae047 --- /dev/null +++ b/manager/container_updateruntimeconfig.go @@ -0,0 +1,11 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// UpdateRuntimeConfig updates the configuration of a running container. +func (s *Server) UpdateRuntimeConfig(ctx context.Context, req *pb.UpdateRuntimeConfigRequest) (*pb.UpdateRuntimeConfigResponse, error) { + return nil, nil +} diff --git a/manager/image_list.go b/manager/image_list.go new file mode 100644 index 00000000..964dbd74 --- /dev/null +++ b/manager/image_list.go @@ -0,0 +1,16 @@ +package server + +import ( + "github.com/Sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// ListImages lists existing images. +func (s *Server) ListImages(ctx context.Context, req *pb.ListImagesRequest) (*pb.ListImagesResponse, error) { + logrus.Debugf("ListImages: %+v", req) + // TODO + // containers/storage will take care of this by looking inside /var/lib/ocid/images + // and listing images. + return &pb.ListImagesResponse{}, nil +} diff --git a/manager/image_pull.go b/manager/image_pull.go new file mode 100644 index 00000000..96ca09b5 --- /dev/null +++ b/manager/image_pull.go @@ -0,0 +1,82 @@ +package server + +import ( + "errors" + "io" + "os" + "path/filepath" + + "github.com/Sirupsen/logrus" + "github.com/containers/image/directory" + "github.com/containers/image/image" + "github.com/containers/image/transports" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// PullImage pulls a image with authentication config. +func (s *Server) PullImage(ctx context.Context, req *pb.PullImageRequest) (*pb.PullImageResponse, error) { + logrus.Debugf("PullImage: %+v", req) + img := req.GetImage().GetImage() + if img == "" { + return nil, errors.New("got empty imagespec name") + } + + // TODO(runcom): deal with AuthConfig in req.GetAuth() + + // TODO(mrunalp,runcom): why do we need the SandboxConfig here? + // how do we pull in a specified sandbox? + tr, err := transports.ParseImageName(img) + if err != nil { + return nil, err + } + // TODO(runcom): figure out the ImageContext story in containers/image instead of passing ("", true) + src, err := tr.NewImageSource(nil, nil) + if err != nil { + return nil, err + } + i := image.FromSource(src) + blobs, err := i.BlobDigests() + if err != nil { + return nil, err + } + + if err = os.Mkdir(filepath.Join(s.config.ImageDir, tr.StringWithinTransport()), 0755); err != nil { + return nil, err + } + dir, err := directory.NewReference(filepath.Join(s.config.ImageDir, tr.StringWithinTransport())) + if err != nil { + return nil, err + } + // TODO(runcom): figure out the ImageContext story in containers/image instead of passing ("", true) + dest, err := dir.NewImageDestination(nil) + if err != nil { + return nil, err + } + // save blobs (layer + config for docker v2s2, layers only for docker v2s1 [the config is in the manifest]) + for _, b := range blobs { + // TODO(runcom,nalin): we need do-then-commit to later purge on error + var r io.ReadCloser + r, _, err = src.GetBlob(b) + if err != nil { + return nil, err + } + if _, _, err = dest.PutBlob(r, b, -1); err != nil { + r.Close() + return nil, err + } + r.Close() + } + // save manifest + m, _, err := i.Manifest() + if err != nil { + return nil, err + } + if err := dest.PutManifest(m); err != nil { + return nil, err + } + + // TODO: what else do we need here? (Signatures when the story isn't just pulling from docker://) + + return &pb.PullImageResponse{}, nil +} diff --git a/manager/image_remove.go b/manager/image_remove.go new file mode 100644 index 00000000..21bf30ff --- /dev/null +++ b/manager/image_remove.go @@ -0,0 +1,13 @@ +package server + +import ( + "github.com/Sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// RemoveImage removes the image. +func (s *Server) RemoveImage(ctx context.Context, req *pb.RemoveImageRequest) (*pb.RemoveImageResponse, error) { + logrus.Debugf("RemoveImage: %+v", req) + return &pb.RemoveImageResponse{}, nil +} diff --git a/manager/image_status.go b/manager/image_status.go new file mode 100644 index 00000000..4ab113d5 --- /dev/null +++ b/manager/image_status.go @@ -0,0 +1,16 @@ +package server + +import ( + "github.com/Sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// ImageStatus returns the status of the image. +func (s *Server) ImageStatus(ctx context.Context, req *pb.ImageStatusRequest) (*pb.ImageStatusResponse, error) { + logrus.Debugf("ImageStatus: %+v", req) + // TODO + // containers/storage will take care of this by looking inside /var/lib/ocid/images + // and getting the image status + return &pb.ImageStatusResponse{}, nil +} diff --git a/manager/runtime_status.go b/manager/runtime_status.go new file mode 100644 index 00000000..611dc2f4 --- /dev/null +++ b/manager/runtime_status.go @@ -0,0 +1,41 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// Status returns the status of the runtime +func (s *Server) Status(ctx context.Context, req *pb.StatusRequest) (*pb.StatusResponse, error) { + + // Deal with Runtime conditions + runtimeReady, err := s.runtime.RuntimeReady() + if err != nil { + return nil, err + } + networkReady, err := s.runtime.NetworkReady() + if err != nil { + return nil, err + } + + // Use vendored strings + runtimeReadyConditionString := pb.RuntimeReady + networkReadyConditionString := pb.NetworkReady + + resp := &pb.StatusResponse{ + Status: &pb.RuntimeStatus{ + Conditions: []*pb.RuntimeCondition{ + &pb.RuntimeCondition{ + Type: &runtimeReadyConditionString, + Status: &runtimeReady, + }, + &pb.RuntimeCondition{ + Type: &networkReadyConditionString, + Status: &networkReady, + }, + }, + }, + } + + return resp, nil +} diff --git a/manager/sandbox.go b/manager/sandbox.go new file mode 100644 index 00000000..7e9a38b5 --- /dev/null +++ b/manager/sandbox.go @@ -0,0 +1,270 @@ +package server + +import ( + "crypto/rand" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/Sirupsen/logrus" + "github.com/docker/docker/pkg/stringid" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/containernetworking/cni/pkg/ns" + "k8s.io/kubernetes/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +type sandboxNetNs struct { + sync.Mutex + ns ns.NetNS + symlink *os.File + closed bool +} + +func (ns *sandboxNetNs) symlinkCreate(name string) error { + b := make([]byte, 4) + _, randErr := rand.Reader.Read(b) + if randErr != nil { + return randErr + } + + nsName := fmt.Sprintf("%s-%x", name, b) + symlinkPath := filepath.Join(nsRunDir, nsName) + + if err := os.Symlink(ns.ns.Path(), symlinkPath); err != nil { + return err + } + + fd, err := os.Open(symlinkPath) + if err != nil { + if removeErr := os.RemoveAll(symlinkPath); removeErr != nil { + return removeErr + } + + return err + } + + ns.symlink = fd + + return nil +} + +func (ns *sandboxNetNs) symlinkRemove() error { + if err := ns.symlink.Close(); err != nil { + return err + } + + return os.RemoveAll(ns.symlink.Name()) +} + +func isSymbolicLink(path string) (bool, error) { + fi, err := os.Lstat(path) + if err != nil { + return false, err + } + + return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil +} + +func netNsGet(nspath, name string) (*sandboxNetNs, error) { + if err := ns.IsNSorErr(nspath); err != nil { + return nil, errSandboxClosedNetNS + } + + symlink, symlinkErr := isSymbolicLink(nspath) + if symlinkErr != nil { + return nil, symlinkErr + } + + var resolvedNsPath string + if symlink { + path, err := os.Readlink(nspath) + if err != nil { + return nil, err + } + resolvedNsPath = path + } else { + resolvedNsPath = nspath + } + + netNS, err := ns.GetNS(resolvedNsPath) + if err != nil { + return nil, err + } + + netNs := &sandboxNetNs{ns: netNS, closed: false,} + + if symlink { + fd, err := os.Open(nspath) + if err != nil { + return nil, err + } + + netNs.symlink = fd + } else { + if err := netNs.symlinkCreate(name); err != nil { + return nil, err + } + } + + return netNs, nil +} + +func hostNetNsPath() (string, error) { + netNS, err := ns.GetCurrentNS() + if err != nil { + return "", err + } + + defer netNS.Close() + + return netNS.Path(), nil +} + +type sandbox struct { + id string + name string + logDir string + labels fields.Set + annotations map[string]string + infraContainer *oci.Container + containers oci.Store + processLabel string + mountLabel string + netns *sandboxNetNs + metadata *pb.PodSandboxMetadata + shmPath string +} + +const ( + podDefaultNamespace = "default" + defaultShmSize = 64 * 1024 * 1024 + nsRunDir = "/var/run/netns" +) + +var ( + errSandboxIDEmpty = errors.New("PodSandboxId should not be empty") + errSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed") +) + +func (s *sandbox) addContainer(c *oci.Container) { + s.containers.Add(c.Name(), c) +} + +func (s *sandbox) getContainer(name string) *oci.Container { + return s.containers.Get(name) +} + +func (s *sandbox) removeContainer(c *oci.Container) { + s.containers.Delete(c.Name()) +} + +func (s *sandbox) netNs() ns.NetNS { + if s.netns == nil { + return nil + } + + return s.netns.ns +} + +func (s *sandbox) netNsPath() string { + if s.netns == nil { + return "" + } + + return s.netns.symlink.Name() +} + +func (s *sandbox) netNsCreate() error { + if s.netns != nil { + return fmt.Errorf("net NS already created") + } + + netNS, err := ns.NewNS() + if err != nil { + return err + } + + s.netns = &sandboxNetNs{ + ns: netNS, + closed: false, + } + + if err := s.netns.symlinkCreate(s.name); err != nil { + logrus.Warnf("Could not create nentns symlink %v", err) + + if err := s.netns.ns.Close(); err != nil { + return err + } + + return err + } + + return nil +} + +func (s *sandbox) netNsRemove() error { + if s.netns == nil { + logrus.Warn("no networking namespace") + return nil + } + + s.netns.Lock() + defer s.netns.Unlock() + + if s.netns.closed { + // netNsRemove() can be called multiple + // times without returning an error. + return nil + } + + if err := s.netns.symlinkRemove(); err != nil { + return err + } + + if err := s.netns.ns.Close(); err != nil { + return err + } + + s.netns.closed = true + return nil +} + +func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) { + var ( + err error + id = stringid.GenerateNonCryptoID() + ) + if namespace == "" { + namespace = podDefaultNamespace + } + + if name, err = s.reservePodName(id, fmt.Sprintf("%s-%s-%v", namespace, name, attempt)); err != nil { + return "", "", err + } + return id, name, err +} + +type podSandboxRequest interface { + GetPodSandboxId() string +} + +func (s *Server) getPodSandboxFromRequest(req podSandboxRequest) (*sandbox, error) { + sbID := req.GetPodSandboxId() + if sbID == "" { + return nil, errSandboxIDEmpty + } + + sandboxID, err := s.podIDIndex.Get(sbID) + if err != nil { + return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", sbID, err) + } + + sb := s.getSandbox(sandboxID) + if sb == nil { + return nil, fmt.Errorf("specified sandbox not found: %s", sandboxID) + } + return sb, nil +} diff --git a/manager/sandbox_list.go b/manager/sandbox_list.go new file mode 100644 index 00000000..3d8ae6a6 --- /dev/null +++ b/manager/sandbox_list.go @@ -0,0 +1,92 @@ +package server + +import ( + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + "k8s.io/kubernetes/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// filterSandbox returns whether passed container matches filtering criteria +func filterSandbox(p *pb.PodSandbox, filter *pb.PodSandboxFilter) bool { + if filter != nil { + if filter.State != nil { + if *p.State != *filter.State { + return false + } + } + if filter.LabelSelector != nil { + sel := fields.SelectorFromSet(filter.LabelSelector) + if !sel.Matches(fields.Set(p.Labels)) { + return false + } + } + } + return true +} + +// ListPodSandbox returns a list of SandBoxes. +func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxRequest) (*pb.ListPodSandboxResponse, error) { + logrus.Debugf("ListPodSandboxRequest %+v", req) + var pods []*pb.PodSandbox + var podList []*sandbox + for _, sb := range s.state.sandboxes { + podList = append(podList, sb) + } + + filter := req.Filter + // Filter by pod id first. + if filter != nil { + if filter.Id != nil { + id, err := s.podIDIndex.Get(*filter.Id) + if err != nil { + return nil, err + } + sb := s.getSandbox(id) + if sb == nil { + podList = []*sandbox{} + } else { + podList = []*sandbox{sb} + } + } + } + + for _, sb := range podList { + podInfraContainer := sb.infraContainer + if podInfraContainer == nil { + // this can't really happen, but if it does because of a bug + // it's better not to panic + continue + } + if err := s.runtime.UpdateStatus(podInfraContainer); err != nil { + return nil, err + } + cState := s.runtime.ContainerStatus(podInfraContainer) + created := cState.Created.UnixNano() + rStatus := pb.PodSandboxState_SANDBOX_NOTREADY + if cState.Status == oci.ContainerStateRunning { + rStatus = pb.PodSandboxState_SANDBOX_READY + } + + pod := &pb.PodSandbox{ + Id: &sb.id, + CreatedAt: int64Ptr(created), + State: &rStatus, + Labels: sb.labels, + Annotations: sb.annotations, + Metadata: sb.metadata, + } + + // Filter by other criteria such as state and labels. + if filterSandbox(pod, req.Filter) { + pods = append(pods, pod) + } + } + + resp := &pb.ListPodSandboxResponse{ + Items: pods, + } + logrus.Debugf("ListPodSandboxResponse %+v", resp) + return resp, nil +} diff --git a/manager/sandbox_remove.go b/manager/sandbox_remove.go new file mode 100644 index 00000000..9064b716 --- /dev/null +++ b/manager/sandbox_remove.go @@ -0,0 +1,95 @@ +package server + +import ( + "fmt" + "os" + "path/filepath" + "syscall" + + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/opencontainers/runc/libcontainer/label" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// RemovePodSandbox deletes the sandbox. If there are any running containers in the +// sandbox, they should be force deleted. +func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxRequest) (*pb.RemovePodSandboxResponse, error) { + logrus.Debugf("RemovePodSandboxRequest %+v", req) + sb, err := s.getPodSandboxFromRequest(req) + if err != nil { + if err == errSandboxIDEmpty { + return nil, err + } + + resp := &pb.RemovePodSandboxResponse{} + logrus.Warnf("could not get sandbox %s, it's probably been removed already: %v", req.GetPodSandboxId(), err) + return resp, nil + } + + podInfraContainer := sb.infraContainer + containers := sb.containers.List() + containers = append(containers, podInfraContainer) + + // Delete all the containers in the sandbox + for _, c := range containers { + if err := s.runtime.UpdateStatus(c); err != nil { + return nil, fmt.Errorf("failed to update container state: %v", err) + } + + cState := s.runtime.ContainerStatus(c) + if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning { + if err := s.runtime.StopContainer(c); err != nil { + return nil, fmt.Errorf("failed to stop container %s: %v", c.Name(), err) + } + } + + if err := s.runtime.DeleteContainer(c); err != nil { + return nil, fmt.Errorf("failed to delete container %s in sandbox %s: %v", c.Name(), sb.id, err) + } + + if c == podInfraContainer { + continue + } + + containerDir := filepath.Join(s.runtime.ContainerDir(), c.ID()) + if err := os.RemoveAll(containerDir); err != nil { + return nil, fmt.Errorf("failed to remove container %s directory: %v", c.Name(), err) + } + + s.releaseContainerName(c.Name()) + s.removeContainer(c) + } + + if err := label.UnreserveLabel(sb.processLabel); err != nil { + return nil, err + } + + // unmount the shm for the pod + if sb.shmPath != "/dev/shm" { + if err := syscall.Unmount(sb.shmPath, syscall.MNT_DETACH); err != nil { + return nil, err + } + } + + if err := sb.netNsRemove(); err != nil { + return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err) + } + + // Remove the files related to the sandbox + podSandboxDir := filepath.Join(s.config.SandboxDir, sb.id) + if err := os.RemoveAll(podSandboxDir); err != nil { + return nil, fmt.Errorf("failed to remove sandbox %s directory: %v", sb.id, err) + } + s.releaseContainerName(podInfraContainer.Name()) + s.removeContainer(podInfraContainer) + sb.infraContainer = nil + + s.releasePodName(sb.name) + s.removeSandbox(sb.id) + + resp := &pb.RemovePodSandboxResponse{} + logrus.Debugf("RemovePodSandboxResponse %+v", resp) + return resp, nil +} diff --git a/manager/sandbox_run.go b/manager/sandbox_run.go new file mode 100644 index 00000000..7c0d604c --- /dev/null +++ b/manager/sandbox_run.go @@ -0,0 +1,377 @@ +package server + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strconv" + "syscall" + + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/utils" + "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/runtime-tools/generate" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +func (s *Server) runContainer(container *oci.Container) error { + if err := s.runtime.CreateContainer(container); err != nil { + return err + } + + if err := s.runtime.UpdateStatus(container); err != nil { + return err + } + + if err := s.runtime.StartContainer(container); err != nil { + return err + } + + if err := s.runtime.UpdateStatus(container); err != nil { + return err + } + + return nil +} + +// RunPodSandbox creates and runs a pod-level sandbox. +func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { + logrus.Debugf("RunPodSandboxRequest %+v", req) + var processLabel, mountLabel, netNsPath string + // process req.Name + name := req.GetConfig().GetMetadata().GetName() + if name == "" { + return nil, fmt.Errorf("PodSandboxConfig.Name should not be empty") + } + + namespace := req.GetConfig().GetMetadata().GetNamespace() + attempt := req.GetConfig().GetMetadata().GetAttempt() + + id, name, err := s.generatePodIDandName(name, namespace, attempt) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + s.releasePodName(name) + } + }() + + if err = s.podIDIndex.Add(id); err != nil { + return nil, err + } + + defer func() { + if err != nil { + if err = s.podIDIndex.Delete(id); err != nil { + logrus.Warnf("couldn't delete pod id %s from idIndex", id) + } + } + }() + + podSandboxDir := filepath.Join(s.config.SandboxDir, id) + if _, err = os.Stat(podSandboxDir); err == nil { + return nil, fmt.Errorf("pod sandbox (%s) already exists", podSandboxDir) + } + + defer func() { + if err != nil { + if err2 := os.RemoveAll(podSandboxDir); err2 != nil { + logrus.Warnf("couldn't cleanup podSandboxDir %s: %v", podSandboxDir, err2) + } + } + }() + + if err = os.MkdirAll(podSandboxDir, 0755); err != nil { + return nil, err + } + + // creates a spec Generator with the default spec. + g := generate.New() + + // TODO: Make the `graph/vfs` part of this configurable once the storage + // integration has been merged. + podInfraRootfs := filepath.Join(s.config.Root, "graph/vfs/pause") + // setup defaults for the pod sandbox + g.SetRootPath(filepath.Join(podInfraRootfs, "rootfs")) + g.SetRootReadonly(true) + g.SetProcessArgs([]string{"/pause"}) + + // set hostname + hostname := req.GetConfig().GetHostname() + if hostname != "" { + g.SetHostname(hostname) + } + + // set log directory + logDir := req.GetConfig().GetLogDirectory() + if logDir == "" { + logDir = filepath.Join(s.config.LogDir, id) + } + + // set DNS options + dnsServers := req.GetConfig().GetDnsConfig().GetServers() + dnsSearches := req.GetConfig().GetDnsConfig().GetSearches() + dnsOptions := req.GetConfig().GetDnsConfig().GetOptions() + resolvPath := fmt.Sprintf("%s/resolv.conf", podSandboxDir) + err = parseDNSOptions(dnsServers, dnsSearches, dnsOptions, resolvPath) + if err != nil { + err1 := removeFile(resolvPath) + if err1 != nil { + err = err1 + return nil, fmt.Errorf("%v; failed to remove %s: %v", err, resolvPath, err1) + } + return nil, err + } + + g.AddBindMount(resolvPath, "/etc/resolv.conf", []string{"ro"}) + + // add metadata + metadata := req.GetConfig().GetMetadata() + metadataJSON, err := json.Marshal(metadata) + if err != nil { + return nil, err + } + + // add labels + labels := req.GetConfig().GetLabels() + labelsJSON, err := json.Marshal(labels) + if err != nil { + return nil, err + } + + // add annotations + annotations := req.GetConfig().GetAnnotations() + annotationsJSON, err := json.Marshal(annotations) + if err != nil { + return nil, err + } + + // Don't use SELinux separation with Host Pid or IPC Namespace, + if !req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() && !req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostIpc() { + processLabel, mountLabel, err = getSELinuxLabels(nil) + if err != nil { + return nil, err + } + g.SetProcessSelinuxLabel(processLabel) + } + + // create shm mount for the pod containers. + var shmPath string + if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostIpc() { + shmPath = "/dev/shm" + } else { + shmPath, err = setupShm(podSandboxDir, mountLabel) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + if err2 := syscall.Unmount(shmPath, syscall.MNT_DETACH); err2 != nil { + logrus.Warnf("failed to unmount shm for pod: %v", err2) + } + } + }() + } + + containerID, containerName, err := s.generateContainerIDandName(name, "infra", 0) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + s.releaseContainerName(containerName) + } + }() + + if err = s.ctrIDIndex.Add(containerID); err != nil { + return nil, err + } + + defer func() { + if err != nil { + if err = s.ctrIDIndex.Delete(containerID); err != nil { + logrus.Warnf("couldn't delete ctr id %s from idIndex", containerID) + } + } + }() + + g.AddAnnotation("ocid/metadata", string(metadataJSON)) + g.AddAnnotation("ocid/labels", string(labelsJSON)) + g.AddAnnotation("ocid/annotations", string(annotationsJSON)) + g.AddAnnotation("ocid/log_path", logDir) + g.AddAnnotation("ocid/name", name) + g.AddAnnotation("ocid/container_type", containerTypeSandbox) + g.AddAnnotation("ocid/container_name", containerName) + g.AddAnnotation("ocid/container_id", containerID) + g.AddAnnotation("ocid/shm_path", shmPath) + + sb := &sandbox{ + id: id, + name: name, + logDir: logDir, + labels: labels, + annotations: annotations, + containers: oci.NewMemoryStore(), + processLabel: processLabel, + mountLabel: mountLabel, + metadata: metadata, + shmPath: shmPath, + } + + s.addSandbox(sb) + + for k, v := range annotations { + g.AddAnnotation(k, v) + } + + // extract linux sysctls from annotations and pass down to oci runtime + safe, unsafe, err := SysctlsFromPodAnnotations(annotations) + if err != nil { + return nil, err + } + for _, sysctl := range safe { + g.AddLinuxSysctl(sysctl.Name, sysctl.Value) + } + for _, sysctl := range unsafe { + g.AddLinuxSysctl(sysctl.Name, sysctl.Value) + } + + // setup cgroup settings + cgroupParent := req.GetConfig().GetLinux().GetCgroupParent() + if cgroupParent != "" { + g.SetLinuxCgroupsPath(cgroupParent) + } + + // set up namespaces + if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostNetwork() { + err = g.RemoveLinuxNamespace("network") + if err != nil { + return nil, err + } + + netNsPath, err = hostNetNsPath() + if err != nil { + return nil, err + } + } else { + // Create the sandbox network namespace + if err = sb.netNsCreate(); err != nil { + return nil, err + } + + defer func() { + if err == nil { + return + } + + if netnsErr := sb.netNsRemove(); netnsErr != nil { + logrus.Warnf("Failed to remove networking namespace: %v", netnsErr) + } + } () + + // Pass the created namespace path to the runtime + err = g.AddOrReplaceLinuxNamespace("network", sb.netNsPath()) + if err != nil { + return nil, err + } + + netNsPath = sb.netNsPath() + } + + if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() { + err = g.RemoveLinuxNamespace("pid") + if err != nil { + return nil, err + } + } + + if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostIpc() { + err = g.RemoveLinuxNamespace("ipc") + if err != nil { + return nil, err + } + } + + err = g.SaveToFile(filepath.Join(podSandboxDir, "config.json"), generate.ExportOptions{}) + if err != nil { + return nil, err + } + + if _, err = os.Stat(podInfraRootfs); err != nil { + if os.IsNotExist(err) { + // TODO: Replace by rootfs creation API when it is ready + if err = utils.CreateInfraRootfs(podInfraRootfs, s.config.Pause); err != nil { + return nil, err + } + } else { + return nil, err + } + } + + container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, sb.netNs(), labels, annotations, nil, nil, id, false) + if err != nil { + return nil, err + } + + sb.infraContainer = container + + // setup the network + podNamespace := "" + if err = s.netPlugin.SetUpPod(netNsPath, podNamespace, id, containerName); err != nil { + return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err) + } + + if err = s.runContainer(container); err != nil { + return nil, err + } + + resp = &pb.RunPodSandboxResponse{PodSandboxId: &id} + logrus.Debugf("RunPodSandboxResponse: %+v", resp) + return resp, nil +} + +func getSELinuxLabels(selinuxOptions *pb.SELinuxOption) (processLabel string, mountLabel string, err error) { + processLabel = "" + if selinuxOptions != nil { + user := selinuxOptions.GetUser() + if user == "" { + return "", "", fmt.Errorf("SELinuxOption.User is empty") + } + + role := selinuxOptions.GetRole() + if role == "" { + return "", "", fmt.Errorf("SELinuxOption.Role is empty") + } + + t := selinuxOptions.GetType() + if t == "" { + return "", "", fmt.Errorf("SELinuxOption.Type is empty") + } + + level := selinuxOptions.GetLevel() + if level == "" { + return "", "", fmt.Errorf("SELinuxOption.Level is empty") + } + processLabel = fmt.Sprintf("%s:%s:%s:%s", user, role, t, level) + } + return label.InitLabels(label.DupSecOpt(processLabel)) +} + +func setupShm(podSandboxDir, mountLabel string) (shmPath string, err error) { + shmPath = filepath.Join(podSandboxDir, "shm") + if err = os.Mkdir(shmPath, 0700); err != nil { + return "", err + } + shmOptions := "mode=1777,size=" + strconv.Itoa(defaultShmSize) + if err = syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), + label.FormatMountLabel(shmOptions, mountLabel)); err != nil { + return "", fmt.Errorf("failed to mount shm tmpfs for pod: %v", err) + } + return shmPath, nil +} diff --git a/manager/sandbox_status.go b/manager/sandbox_status.go new file mode 100644 index 00000000..d3826c3a --- /dev/null +++ b/manager/sandbox_status.go @@ -0,0 +1,62 @@ +package server + +import ( + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// PodSandboxStatus returns the Status of the PodSandbox. +func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusRequest) (*pb.PodSandboxStatusResponse, error) { + logrus.Debugf("PodSandboxStatusRequest %+v", req) + sb, err := s.getPodSandboxFromRequest(req) + if err != nil { + return nil, err + } + + podInfraContainer := sb.infraContainer + if err = s.runtime.UpdateStatus(podInfraContainer); err != nil { + return nil, err + } + + cState := s.runtime.ContainerStatus(podInfraContainer) + created := cState.Created.UnixNano() + + netNsPath, err := podInfraContainer.NetNsPath() + if err != nil { + return nil, err + } + podNamespace := "" + ip, err := s.netPlugin.GetContainerNetworkStatus(netNsPath, podNamespace, sb.id, podInfraContainer.Name()) + if err != nil { + // ignore the error on network status + ip = "" + } + + rStatus := pb.PodSandboxState_SANDBOX_NOTREADY + if cState.Status == oci.ContainerStateRunning { + rStatus = pb.PodSandboxState_SANDBOX_READY + } + + sandboxID := sb.id + resp := &pb.PodSandboxStatusResponse{ + Status: &pb.PodSandboxStatus{ + Id: &sandboxID, + CreatedAt: int64Ptr(created), + Linux: &pb.LinuxPodSandboxStatus{ + Namespaces: &pb.Namespace{ + Network: sPtr(netNsPath), + }, + }, + Network: &pb.PodSandboxNetworkStatus{Ip: &ip}, + State: &rStatus, + Labels: sb.labels, + Annotations: sb.annotations, + Metadata: sb.metadata, + }, + } + + logrus.Infof("PodSandboxStatusResponse: %+v", resp) + return resp, nil +} diff --git a/manager/sandbox_stop.go b/manager/sandbox_stop.go new file mode 100644 index 00000000..47f570c2 --- /dev/null +++ b/manager/sandbox_stop.go @@ -0,0 +1,61 @@ +package server + +import ( + "fmt" + "os" + + "github.com/Sirupsen/logrus" + "github.com/kubernetes-incubator/cri-o/oci" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// StopPodSandbox stops the sandbox. If there are any running containers in the +// sandbox, they should be force terminated. +func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (*pb.StopPodSandboxResponse, error) { + logrus.Debugf("StopPodSandboxRequest %+v", req) + sb, err := s.getPodSandboxFromRequest(req) + if err != nil { + return nil, err + } + + podNamespace := "" + podInfraContainer := sb.infraContainer + netnsPath, err := podInfraContainer.NetNsPath() + if err != nil { + return nil, err + } + if _, err := os.Stat(netnsPath); err == nil { + if err2 := s.netPlugin.TearDownPod(netnsPath, podNamespace, sb.id, podInfraContainer.Name()); err2 != nil { + return nil, fmt.Errorf("failed to destroy network for container %s in sandbox %s: %v", + podInfraContainer.Name(), sb.id, err2) + } + } else if !os.IsNotExist(err) { // it's ok for netnsPath to *not* exist + return nil, fmt.Errorf("failed to stat netns path for container %s in sandbox %s before tearing down the network: %v", + podInfraContainer.Name(), sb.id, err) + } + + // Close the sandbox networking namespace. + if err := sb.netNsRemove(); err != nil { + return nil, err + } + + containers := sb.containers.List() + containers = append(containers, podInfraContainer) + + for _, c := range containers { + if err := s.runtime.UpdateStatus(c); err != nil { + return nil, err + } + cStatus := s.runtime.ContainerStatus(c) + if cStatus.Status != oci.ContainerStateStopped { + if err := s.runtime.StopContainer(c); err != nil { + return nil, fmt.Errorf("failed to stop container %s in sandbox %s: %v", c.Name(), sb.id, err) + } + } + } + + resp := &pb.StopPodSandboxResponse{} + logrus.Debugf("StopPodSandboxResponse: %+v", resp) + return resp, nil +} diff --git a/manager/seccomp/seccomp.go b/manager/seccomp/seccomp.go new file mode 100644 index 00000000..2d4c9480 --- /dev/null +++ b/manager/seccomp/seccomp.go @@ -0,0 +1,149 @@ +// +build seccomp + +package seccomp + +import ( + "encoding/json" + "errors" + "fmt" + + "github.com/docker/docker/pkg/stringutils" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + libseccomp "github.com/seccomp/libseccomp-golang" +) + +// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec. +func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error { + return setupSeccomp(&config, specgen) +} + +// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile. +func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error { + var config Seccomp + if err := json.Unmarshal(body, &config); err != nil { + return fmt.Errorf("decoding seccomp profile failed: %v", err) + } + return setupSeccomp(&config, specgen) +} + +var nativeToSeccomp = map[string]Arch{ + "amd64": ArchX86_64, + "arm64": ArchAARCH64, + "mips64": ArchMIPS64, + "mips64n32": ArchMIPS64N32, + "mipsel64": ArchMIPSEL64, + "mipsel64n32": ArchMIPSEL64N32, + "s390x": ArchS390X, +} + +func setupSeccomp(config *Seccomp, specgen *generate.Generator) error { + if config == nil { + return nil + } + + // No default action specified, no syscalls listed, assume seccomp disabled + if config.DefaultAction == "" && len(config.Syscalls) == 0 { + return nil + } + + var arch string + var native, err = libseccomp.GetNativeArch() + if err == nil { + arch = native.String() + } + + if len(config.Architectures) != 0 && len(config.ArchMap) != 0 { + return errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'") + } + + customspec := specgen.Spec() + customspec.Linux.Seccomp = &specs.Seccomp{} + + // if config.Architectures == 0 then libseccomp will figure out the architecture to use + if len(config.Architectures) != 0 { + for _, a := range config.Architectures { + customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a)) + } + } + + if len(config.ArchMap) != 0 { + for _, a := range config.ArchMap { + seccompArch, ok := nativeToSeccomp[arch] + if ok { + if a.Arch == seccompArch { + customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a.Arch)) + for _, sa := range a.SubArches { + customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(sa)) + } + break + } + } + } + } + + customspec.Linux.Seccomp.DefaultAction = specs.Action(config.DefaultAction) + +Loop: + // Loop through all syscall blocks and convert them to libcontainer format after filtering them + for _, call := range config.Syscalls { + if len(call.Excludes.Arches) > 0 { + if stringutils.InSlice(call.Excludes.Arches, arch) { + continue Loop + } + } + if len(call.Excludes.Caps) > 0 { + for _, c := range call.Excludes.Caps { + if stringutils.InSlice(customspec.Process.Capabilities, c) { + continue Loop + } + } + } + if len(call.Includes.Arches) > 0 { + if !stringutils.InSlice(call.Includes.Arches, arch) { + continue Loop + } + } + if len(call.Includes.Caps) > 0 { + for _, c := range call.Includes.Caps { + if !stringutils.InSlice(customspec.Process.Capabilities, c) { + continue Loop + } + } + } + + if call.Name != "" && len(call.Names) != 0 { + return errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'") + } + + if call.Name != "" { + customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args)) + } + + for _, n := range call.Names { + customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(n, call.Action, call.Args)) + } + } + + return nil +} + +func createSpecsSyscall(name string, action Action, args []*Arg) specs.Syscall { + newCall := specs.Syscall{ + Name: name, + Action: specs.Action(action), + } + + // Loop through all the arguments of the syscall and convert them + for _, arg := range args { + newArg := specs.Arg{ + Index: arg.Index, + Value: arg.Value, + ValueTwo: arg.ValueTwo, + Op: specs.Operator(arg.Op), + } + + newCall.Args = append(newCall.Args, newArg) + } + return newCall +} diff --git a/manager/seccomp/seccomp_unsupported.go b/manager/seccomp/seccomp_unsupported.go new file mode 100644 index 00000000..7ea2417f --- /dev/null +++ b/manager/seccomp/seccomp_unsupported.go @@ -0,0 +1,15 @@ +// +build !seccomp + +package seccomp + +import "github.com/opencontainers/runtime-tools/generate" + +// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec. +func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error { + return nil +} + +// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile. +func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error { + return nil +} diff --git a/manager/seccomp/types.go b/manager/seccomp/types.go new file mode 100644 index 00000000..5b07f8c0 --- /dev/null +++ b/manager/seccomp/types.go @@ -0,0 +1,93 @@ +package seccomp + +// Seccomp represents the config for a seccomp profile for syscall restriction. +type Seccomp struct { + DefaultAction Action `json:"defaultAction"` + // Architectures is kept to maintain backward compatibility with the old + // seccomp profile. + Architectures []Arch `json:"architectures,omitempty"` + ArchMap []Architecture `json:"archMap,omitempty"` + Syscalls []*Syscall `json:"syscalls"` +} + +// Architecture is used to represent an specific architecture +// and its sub-architectures +type Architecture struct { + Arch Arch `json:"architecture"` + SubArches []Arch `json:"subArchitectures"` +} + +// Arch used for architectures +type Arch string + +// Additional architectures permitted to be used for system calls +// By default only the native architecture of the kernel is permitted +const ( + ArchX86 Arch = "SCMP_ARCH_X86" + ArchX86_64 Arch = "SCMP_ARCH_X86_64" + ArchX32 Arch = "SCMP_ARCH_X32" + ArchARM Arch = "SCMP_ARCH_ARM" + ArchAARCH64 Arch = "SCMP_ARCH_AARCH64" + ArchMIPS Arch = "SCMP_ARCH_MIPS" + ArchMIPS64 Arch = "SCMP_ARCH_MIPS64" + ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32" + ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL" + ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64" + ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32" + ArchPPC Arch = "SCMP_ARCH_PPC" + ArchPPC64 Arch = "SCMP_ARCH_PPC64" + ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE" + ArchS390 Arch = "SCMP_ARCH_S390" + ArchS390X Arch = "SCMP_ARCH_S390X" +) + +// Action taken upon Seccomp rule match +type Action string + +// Define actions for Seccomp rules +const ( + ActKill Action = "SCMP_ACT_KILL" + ActTrap Action = "SCMP_ACT_TRAP" + ActErrno Action = "SCMP_ACT_ERRNO" + ActTrace Action = "SCMP_ACT_TRACE" + ActAllow Action = "SCMP_ACT_ALLOW" +) + +// Operator used to match syscall arguments in Seccomp +type Operator string + +// Define operators for syscall arguments in Seccomp +const ( + OpNotEqual Operator = "SCMP_CMP_NE" + OpLessThan Operator = "SCMP_CMP_LT" + OpLessEqual Operator = "SCMP_CMP_LE" + OpEqualTo Operator = "SCMP_CMP_EQ" + OpGreaterEqual Operator = "SCMP_CMP_GE" + OpGreaterThan Operator = "SCMP_CMP_GT" + OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ" +) + +// Arg used for matching specific syscall arguments in Seccomp +type Arg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"valueTwo"` + Op Operator `json:"op"` +} + +// Filter is used to conditionally apply Seccomp rules +type Filter struct { + Caps []string `json:"caps,omitempty"` + Arches []string `json:"arches,omitempty"` +} + +// Syscall is used to match a group of syscalls in Seccomp +type Syscall struct { + Name string `json:"name,omitempty"` + Names []string `json:"names,omitempty"` + Action Action `json:"action"` + Args []*Arg `json:"args"` + Comment string `json:"comment"` + Includes Filter `json:"includes"` + Excludes Filter `json:"excludes"` +} diff --git a/manager/server.go b/manager/server.go new file mode 100644 index 00000000..7d0490cd --- /dev/null +++ b/manager/server.go @@ -0,0 +1,419 @@ +package server + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "sync" + "syscall" + + "github.com/Sirupsen/logrus" + "github.com/docker/docker/pkg/registrar" + "github.com/docker/docker/pkg/truncindex" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/server/apparmor" + "github.com/kubernetes-incubator/cri-o/server/seccomp" + "github.com/kubernetes-incubator/cri-o/utils" + "github.com/opencontainers/runc/libcontainer/label" + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/rajatchopra/ocicni" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +const ( + runtimeAPIVersion = "v1alpha1" +) + +// Server implements the RuntimeService and ImageService +type Server struct { + config Config + runtime *oci.Runtime + stateLock sync.Mutex + state *serverState + netPlugin ocicni.CNIPlugin + podNameIndex *registrar.Registrar + podIDIndex *truncindex.TruncIndex + ctrNameIndex *registrar.Registrar + ctrIDIndex *truncindex.TruncIndex + + seccompEnabled bool + seccompProfile seccomp.Seccomp + + appArmorEnabled bool + appArmorProfile string +} + +func (s *Server) loadContainer(id string) error { + config, err := ioutil.ReadFile(filepath.Join(s.runtime.ContainerDir(), id, "config.json")) + if err != nil { + return err + } + var m rspec.Spec + if err = json.Unmarshal(config, &m); err != nil { + return err + } + labels := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil { + return err + } + name := m.Annotations["ocid/name"] + name, err = s.reserveContainerName(id, name) + if err != nil { + return err + } + var metadata pb.ContainerMetadata + if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil { + return err + } + sb := s.getSandbox(m.Annotations["ocid/sandbox_id"]) + if sb == nil { + logrus.Warnf("could not get sandbox with id %s, skipping", m.Annotations["ocid/sandbox_id"]) + return nil + } + + var tty bool + if v := m.Annotations["ocid/tty"]; v == "true" { + tty = true + } + containerPath := filepath.Join(s.runtime.ContainerDir(), id) + + var img *pb.ImageSpec + image, ok := m.Annotations["ocid/image"] + if ok { + img = &pb.ImageSpec{ + Image: &image, + } + } + + annotations := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil { + return err + } + + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty) + if err != nil { + return err + } + s.addContainer(ctr) + if err = s.runtime.UpdateStatus(ctr); err != nil { + logrus.Warnf("error updating status for container %s: %v", ctr.ID(), err) + } + if err = s.ctrIDIndex.Add(id); err != nil { + return err + } + return nil +} + +func configNetNsPath(spec rspec.Spec) (string, error) { + for _, ns := range spec.Linux.Namespaces { + if ns.Type != rspec.NetworkNamespace { + continue + } + + if ns.Path == "" { + return "", fmt.Errorf("empty networking namespace") + } + + return ns.Path, nil + } + + return "", fmt.Errorf("missing networking namespace") +} + +func (s *Server) loadSandbox(id string) error { + config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json")) + if err != nil { + return err + } + var m rspec.Spec + if err = json.Unmarshal(config, &m); err != nil { + return err + } + labels := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil { + return err + } + name := m.Annotations["ocid/name"] + name, err = s.reservePodName(id, name) + if err != nil { + return err + } + var metadata pb.PodSandboxMetadata + if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil { + return err + } + + processLabel, mountLabel, err := label.InitLabels(label.DupSecOpt(m.Process.SelinuxLabel)) + if err != nil { + return err + } + + annotations := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil { + return err + } + + sb := &sandbox{ + id: id, + name: name, + logDir: m.Annotations["ocid/log_path"], + labels: labels, + containers: oci.NewMemoryStore(), + processLabel: processLabel, + mountLabel: mountLabel, + annotations: annotations, + metadata: &metadata, + shmPath: m.Annotations["ocid/shm_path"], + } + + // We add a netNS only if we can load a permanent one. + // Otherwise, the sandbox will live in the host namespace. + netNsPath, err := configNetNsPath(m) + if err == nil { + netNS, nsErr := netNsGet(netNsPath, sb.name) + // If we can't load the networking namespace + // because it's closed, we just set the sb netns + // pointer to nil. Otherwise we return an error. + if nsErr != nil && nsErr != errSandboxClosedNetNS { + return nsErr + } + + sb.netns = netNS + } + + s.addSandbox(sb) + + sandboxPath := filepath.Join(s.config.SandboxDir, id) + + if err = label.ReserveLabel(processLabel); err != nil { + return err + } + + cname, err := s.reserveContainerName(m.Annotations["ocid/container_id"], m.Annotations["ocid/container_name"]) + if err != nil { + return err + } + scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false) + if err != nil { + return err + } + sb.infraContainer = scontainer + if err = s.runtime.UpdateStatus(scontainer); err != nil { + logrus.Warnf("error updating status for container %s: %v", scontainer.ID(), err) + } + if err = s.ctrIDIndex.Add(scontainer.ID()); err != nil { + return err + } + if err = s.podIDIndex.Add(id); err != nil { + return err + } + return nil +} + +func (s *Server) restore() { + sandboxDir, err := ioutil.ReadDir(s.config.SandboxDir) + if err != nil && !os.IsNotExist(err) { + logrus.Warnf("could not read sandbox directory %s: %v", sandboxDir, err) + } + for _, v := range sandboxDir { + if !v.IsDir() { + continue + } + if err = s.loadSandbox(v.Name()); err != nil { + logrus.Warnf("could not restore sandbox %s: %v", v.Name(), err) + } + } + containerDir, err := ioutil.ReadDir(s.runtime.ContainerDir()) + if err != nil && !os.IsNotExist(err) { + logrus.Warnf("could not read container directory %s: %v", s.runtime.ContainerDir(), err) + } + for _, v := range containerDir { + if !v.IsDir() { + continue + } + if err := s.loadContainer(v.Name()); err != nil { + logrus.Warnf("could not restore container %s: %v", v.Name(), err) + + } + } +} + +func (s *Server) reservePodName(id, name string) (string, error) { + if err := s.podNameIndex.Reserve(name, id); err != nil { + if err == registrar.ErrNameReserved { + id, err := s.podNameIndex.Get(name) + if err != nil { + logrus.Warnf("conflict, pod name %q already reserved", name) + return "", err + } + return "", fmt.Errorf("conflict, name %q already reserved for pod %q", name, id) + } + return "", fmt.Errorf("error reserving pod name %q", name) + } + return name, nil +} + +func (s *Server) releasePodName(name string) { + s.podNameIndex.Release(name) +} + +func (s *Server) reserveContainerName(id, name string) (string, error) { + if err := s.ctrNameIndex.Reserve(name, id); err != nil { + if err == registrar.ErrNameReserved { + id, err := s.ctrNameIndex.Get(name) + if err != nil { + logrus.Warnf("conflict, ctr name %q already reserved", name) + return "", err + } + return "", fmt.Errorf("conflict, name %q already reserved for ctr %q", name, id) + } + return "", fmt.Errorf("error reserving ctr name %s", name) + } + return name, nil +} + +func (s *Server) releaseContainerName(name string) { + s.ctrNameIndex.Release(name) +} + +const ( + // SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER. + SeccompModeFilter = uintptr(2) +) + +func seccompEnabled() bool { + var enabled bool + // Check if Seccomp is supported, via CONFIG_SECCOMP. + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL { + // Make sure the kernel has CONFIG_SECCOMP_FILTER. + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL { + enabled = true + } + } + return enabled +} + +// New creates a new Server with options provided +func New(config *Config) (*Server, error) { + // TODO: This will go away later when we have wrapper process or systemd acting as + // subreaper. + if err := utils.SetSubreaper(1); err != nil { + return nil, fmt.Errorf("failed to set server as subreaper: %v", err) + } + + utils.StartReaper() + + if err := os.MkdirAll(config.ImageDir, 0755); err != nil { + return nil, err + } + + if err := os.MkdirAll(config.SandboxDir, 0755); err != nil { + return nil, err + } + + r, err := oci.New(config.Runtime, config.ContainerDir, config.Conmon, config.ConmonEnv) + if err != nil { + return nil, err + } + sandboxes := make(map[string]*sandbox) + containers := oci.NewMemoryStore() + netPlugin, err := ocicni.InitCNI("") + if err != nil { + return nil, err + } + s := &Server{ + runtime: r, + netPlugin: netPlugin, + config: *config, + state: &serverState{ + sandboxes: sandboxes, + containers: containers, + }, + seccompEnabled: seccompEnabled(), + appArmorEnabled: apparmor.IsEnabled(), + appArmorProfile: config.ApparmorProfile, + } + seccompProfile, err := ioutil.ReadFile(config.SeccompProfile) + if err != nil { + return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, err) + } + var seccompConfig seccomp.Seccomp + if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil { + return nil, fmt.Errorf("decoding seccomp profile failed: %v", err) + } + s.seccompProfile = seccompConfig + + if s.appArmorEnabled && s.appArmorProfile == apparmor.DefaultApparmorProfile { + if err := apparmor.EnsureDefaultApparmorProfile(); err != nil { + return nil, fmt.Errorf("ensuring the default apparmor profile is installed failed: %v", err) + } + } + + s.podIDIndex = truncindex.NewTruncIndex([]string{}) + s.podNameIndex = registrar.NewRegistrar() + s.ctrIDIndex = truncindex.NewTruncIndex([]string{}) + s.ctrNameIndex = registrar.NewRegistrar() + + s.restore() + + logrus.Debugf("sandboxes: %v", s.state.sandboxes) + logrus.Debugf("containers: %v", s.state.containers) + return s, nil +} + +type serverState struct { + sandboxes map[string]*sandbox + containers oci.Store +} + +func (s *Server) addSandbox(sb *sandbox) { + s.stateLock.Lock() + s.state.sandboxes[sb.id] = sb + s.stateLock.Unlock() +} + +func (s *Server) getSandbox(id string) *sandbox { + s.stateLock.Lock() + sb := s.state.sandboxes[id] + s.stateLock.Unlock() + return sb +} + +func (s *Server) hasSandbox(id string) bool { + s.stateLock.Lock() + _, ok := s.state.sandboxes[id] + s.stateLock.Unlock() + return ok +} + +func (s *Server) removeSandbox(id string) { + s.stateLock.Lock() + delete(s.state.sandboxes, id) + s.stateLock.Unlock() +} + +func (s *Server) addContainer(c *oci.Container) { + s.stateLock.Lock() + sandbox := s.state.sandboxes[c.Sandbox()] + // TODO(runcom): handle !ok above!!! otherwise it panics! + sandbox.addContainer(c) + s.state.containers.Add(c.ID(), c) + s.stateLock.Unlock() +} + +func (s *Server) getContainer(id string) *oci.Container { + s.stateLock.Lock() + c := s.state.containers.Get(id) + s.stateLock.Unlock() + return c +} + +func (s *Server) removeContainer(c *oci.Container) { + s.stateLock.Lock() + sandbox := s.state.sandboxes[c.Sandbox()] + sandbox.removeContainer(c) + s.state.containers.Delete(c.ID()) + s.stateLock.Unlock() +} diff --git a/manager/utils.go b/manager/utils.go new file mode 100644 index 00000000..6b5c8e15 --- /dev/null +++ b/manager/utils.go @@ -0,0 +1,158 @@ +package server + +import ( + "fmt" + "io" + "os" + "strings" +) + +const ( + // According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html: + // "The search list is currently limited to six domains with a total of 256 characters." + maxDNSSearches = 6 +) + +func int64Ptr(i int64) *int64 { + return &i +} + +func int32Ptr(i int32) *int32 { + return &i +} + +func sPtr(s string) *string { + return &s +} + +func copyFile(src, dest string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(dest) + if err != nil { + return err + } + defer out.Close() + + _, err = io.Copy(out, in) + return err +} + +func removeFile(path string) error { + if _, err := os.Stat(path); err == nil { + if err := os.Remove(path); err != nil { + return err + } + } + return nil +} + +func parseDNSOptions(servers, searches, options []string, path string) error { + nServers := len(servers) + nSearches := len(searches) + nOptions := len(options) + if nServers == 0 && nSearches == 0 && nOptions == 0 { + return copyFile("/etc/resolv.conf", path) + } + + if nSearches > maxDNSSearches { + return fmt.Errorf("DNSOption.Searches has more than 6 domains") + } + + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + if nSearches > 0 { + data := fmt.Sprintf("search %s\n", strings.Join(searches, " ")) + _, err = f.Write([]byte(data)) + if err != nil { + return err + } + } + + if nServers > 0 { + data := fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver ")) + _, err = f.Write([]byte(data)) + if err != nil { + return err + } + } + + if nOptions > 0 { + data := fmt.Sprintf("options %s\n", strings.Join(options, " ")) + _, err = f.Write([]byte(data)) + if err != nil { + return err + } + } + + return nil +} + +// TODO: remove sysctl extraction related code here, instead we import from k8s directly. + +const ( + // SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure + // container of a pod. The annotation value is a comma separated list of sysctl_name=value + // key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by + // the kubelet. Pods with other sysctls will fail to launch. + SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls" + + // UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure + // container of a pod. The annotation value is a comma separated list of sysctl_name=value + // key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly + // namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use + // is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet + // will fail to launch. + UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls" +) + +// Sysctl defines a kernel parameter to be set +type Sysctl struct { + // Name of a property to set + Name string `json:"name"` + // Value of a property to set + Value string `json:"value"` +} + +// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls +// and a slice of unsafe Sysctls. This is only a convenience wrapper around +// SysctlsFromPodAnnotation. +func SysctlsFromPodAnnotations(a map[string]string) ([]Sysctl, []Sysctl, error) { + safe, err := SysctlsFromPodAnnotation(a[SysctlsPodAnnotationKey]) + if err != nil { + return nil, nil, err + } + unsafe, err := SysctlsFromPodAnnotation(a[UnsafeSysctlsPodAnnotationKey]) + if err != nil { + return nil, nil, err + } + + return safe, unsafe, nil +} + +// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls. +func SysctlsFromPodAnnotation(annotation string) ([]Sysctl, error) { + if len(annotation) == 0 { + return nil, nil + } + + kvs := strings.Split(annotation, ",") + sysctls := make([]Sysctl, len(kvs)) + for i, kv := range kvs { + cs := strings.Split(kv, "=") + if len(cs) != 2 || len(cs[0]) == 0 { + return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv) + } + sysctls[i].Name = cs[0] + sysctls[i].Value = cs[1] + } + return sysctls, nil +} diff --git a/manager/version.go b/manager/version.go new file mode 100644 index 00000000..56c990fd --- /dev/null +++ b/manager/version.go @@ -0,0 +1,29 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +// Version returns the runtime name, runtime version and runtime API version +func (s *Server) Version(ctx context.Context, req *pb.VersionRequest) (*pb.VersionResponse, error) { + + runtimeVersion, err := s.runtime.Version() + if err != nil { + return nil, err + } + + // TODO: Track upstream code. For now it expects 0.1.0 + version := "0.1.0" + + // taking const address + rav := runtimeAPIVersion + runtimeName := s.runtime.Name() + + return &pb.VersionResponse{ + Version: &version, + RuntimeName: &runtimeName, + RuntimeVersion: &runtimeVersion, + RuntimeApiVersion: &rav, + }, nil +}