From cc39203b0904c1ac03492782d8968917c94066b1 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 29 Jan 2018 03:56:12 -0500 Subject: [PATCH] server: shuffle platform dependent operations --- server/container_attach.go | 4 +- server/container_create.go | 111 +----- server/container_create_linux.go | 118 ++++++ server/container_create_unsupported.go | 19 + server/sandbox_run.go | 504 +----------------------- server/sandbox_run_linux.go | 519 +++++++++++++++++++++++++ server/sandbox_run_unsupported.go | 14 + server/sandbox_stop.go | 113 +----- server/sandbox_stop_linux.go | 123 ++++++ server/sandbox_stop_unsupported.go | 14 + 10 files changed, 821 insertions(+), 718 deletions(-) create mode 100644 server/container_create_linux.go create mode 100644 server/container_create_unsupported.go create mode 100644 server/sandbox_run_linux.go create mode 100644 server/sandbox_run_unsupported.go create mode 100644 server/sandbox_stop_linux.go create mode 100644 server/sandbox_stop_unsupported.go diff --git a/server/container_attach.go b/server/container_attach.go index c4adec7d..0644e306 100644 --- a/server/container_attach.go +++ b/server/container_attach.go @@ -6,13 +6,13 @@ import ( "net" "os" "path/filepath" + "syscall" "time" "github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/utils" "github.com/sirupsen/logrus" "golang.org/x/net/context" - "golang.org/x/sys/unix" "k8s.io/client-go/tools/remotecommand" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -60,7 +60,7 @@ func (ss streamService) Attach(containerID string, inputStream io.Reader, output } controlPath := filepath.Join(c.BundlePath(), "ctl") - controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0) + controlFile, err := os.OpenFile(controlPath, syscall.O_WRONLY, 0) if err != nil { return fmt.Errorf("failed to open container ctl file: %v", err) } diff --git a/server/container_create.go b/server/container_create.go index 4f6004b2..f2d87f13 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -12,6 +12,7 @@ import ( "sort" "strconv" "strings" + "syscall" "time" dockermounts "github.com/docker/docker/pkg/mount" @@ -25,15 +26,12 @@ import ( "github.com/kubernetes-incubator/cri-o/server/apparmor" "github.com/kubernetes-incubator/cri-o/server/seccomp" "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/user" rspec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" "golang.org/x/net/context" - "golang.org/x/sys/unix" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" ) @@ -136,7 +134,7 @@ func addOCIBindMounts(mountLabel string, containerConfig *pb.ContainerConfig, sp if mount.SelinuxRelabel { // Need a way in kubernetes to determine if the volume is shared or private - if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP { + if err := label.Relabel(src, mountLabel, true); err != nil && err != syscall.ENOTSUP { return nil, nil, fmt.Errorf("relabel failed %s: %v", src, err) } } @@ -245,7 +243,7 @@ func addImageVolumes(rootfs string, s *Server, containerInfo *storage.ContainerI } // Label the source with the sandbox selinux mount label if mountLabel != "" { - if err1 := label.Relabel(src, mountLabel, true); err1 != nil && err1 != unix.ENOTSUP { + if err1 := label.Relabel(src, mountLabel, true); err1 != nil && err1 != syscall.ENOTSUP { return nil, fmt.Errorf("relabel failed %s: %v", src, err1) } } @@ -280,99 +278,7 @@ func resolveSymbolicLink(path string) (string, error) { } func addDevices(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { - sp := specgen.Spec() - if containerConfig.GetLinux().GetSecurityContext().GetPrivileged() { - hostDevices, err := devices.HostDevices() - if err != nil { - return err - } - for _, hostDevice := range hostDevices { - rd := rspec.LinuxDevice{ - Path: hostDevice.Path, - Type: string(hostDevice.Type), - Major: hostDevice.Major, - Minor: hostDevice.Minor, - UID: &hostDevice.Uid, - GID: &hostDevice.Gid, - } - if hostDevice.Major == 0 && hostDevice.Minor == 0 { - // Invalid device, most likely a symbolic link, skip it. - continue - } - specgen.AddDevice(rd) - } - sp.Linux.Resources.Devices = []rspec.LinuxDeviceCgroup{ - { - Allow: true, - Access: "rwm", - }, - } - return nil - } - for _, device := range containerConfig.GetDevices() { - path, err := resolveSymbolicLink(device.HostPath) - if err != nil { - return err - } - dev, err := devices.DeviceFromPath(path, device.Permissions) - // if there was no error, return the device - if err == nil { - rd := rspec.LinuxDevice{ - Path: device.ContainerPath, - Type: string(dev.Type), - Major: dev.Major, - Minor: dev.Minor, - UID: &dev.Uid, - GID: &dev.Gid, - } - specgen.AddDevice(rd) - sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{ - Allow: true, - Type: string(dev.Type), - Major: &dev.Major, - Minor: &dev.Minor, - Access: dev.Permissions, - }) - continue - } - // if the device is not a device node - // try to see if it's a directory holding many devices - if err == devices.ErrNotADevice { - - // check if it is a directory - if src, e := os.Stat(path); e == nil && src.IsDir() { - - // mount the internal devices recursively - filepath.Walk(path, func(dpath string, f os.FileInfo, e error) error { - childDevice, e := devices.DeviceFromPath(dpath, device.Permissions) - if e != nil { - // ignore the device - return nil - } - cPath := strings.Replace(dpath, path, device.ContainerPath, 1) - rd := rspec.LinuxDevice{ - Path: cPath, - Type: string(childDevice.Type), - Major: childDevice.Major, - Minor: childDevice.Minor, - UID: &childDevice.Uid, - GID: &childDevice.Gid, - } - specgen.AddDevice(rd) - sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{ - Allow: true, - Type: string(childDevice.Type), - Major: &childDevice.Major, - Minor: &childDevice.Minor, - Access: childDevice.Permissions, - }) - - return nil - }) - } - } - } - return nil + return addDevicesPlatform(sb, containerConfig, specgen) } // buildOCIProcessArgs build an OCI compatible process arguments slice. @@ -1077,7 +983,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, options = []string{"ro"} } if sb.ResolvPath() != "" { - if err := label.Relabel(sb.ResolvPath(), mountLabel, true); err != nil && err != unix.ENOTSUP { + if err := label.Relabel(sb.ResolvPath(), mountLabel, true); err != nil && err != syscall.ENOTSUP { return nil, err } @@ -1092,7 +998,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } if sb.HostnamePath() != "" { - if err := label.Relabel(sb.HostnamePath(), mountLabel, true); err != nil && err != unix.ENOTSUP { + if err := label.Relabel(sb.HostnamePath(), mountLabel, true); err != nil && err != syscall.ENOTSUP { return nil, err } @@ -1285,8 +1191,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, containerID string, } // Set up pids limit if pids cgroup is mounted - _, err = cgroups.FindCgroupMountpoint("pids") - if err == nil { + if findCgroupMountpoint("pids") == nil { specgen.SetLinuxResourcesPidsLimit(s.config.PidsLimit) } @@ -1440,7 +1345,7 @@ func setupWorkingDirectory(rootfs, mountLabel, containerCwd string) error { return err } if mountLabel != "" { - if err1 := label.Relabel(fp, mountLabel, true); err1 != nil && err1 != unix.ENOTSUP { + if err1 := label.Relabel(fp, mountLabel, true); err1 != nil && err1 != syscall.ENOTSUP { return fmt.Errorf("relabel failed %s: %v", fp, err1) } } diff --git a/server/container_create_linux.go b/server/container_create_linux.go new file mode 100644 index 00000000..dc596105 --- /dev/null +++ b/server/container_create_linux.go @@ -0,0 +1,118 @@ +// +build linux + +package server + +import ( + "os" + "path/filepath" + "strings" + + "github.com/kubernetes-incubator/cri-o/lib/sandbox" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/devices" + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" +) + +func findCgroupMountpoint(name string) error { + // Set up pids limit if pids cgroup is mounted + _, err := cgroups.FindCgroupMountpoint(name) + return err +} + +func addDevicesPlatform(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { + sp := specgen.Spec() + if containerConfig.GetLinux().GetSecurityContext().GetPrivileged() { + hostDevices, err := devices.HostDevices() + if err != nil { + return err + } + for _, hostDevice := range hostDevices { + rd := rspec.LinuxDevice{ + Path: hostDevice.Path, + Type: string(hostDevice.Type), + Major: hostDevice.Major, + Minor: hostDevice.Minor, + UID: &hostDevice.Uid, + GID: &hostDevice.Gid, + } + if hostDevice.Major == 0 && hostDevice.Minor == 0 { + // Invalid device, most likely a symbolic link, skip it. + continue + } + specgen.AddDevice(rd) + } + sp.Linux.Resources.Devices = []rspec.LinuxDeviceCgroup{ + { + Allow: true, + Access: "rwm", + }, + } + return nil + } + for _, device := range containerConfig.GetDevices() { + path, err := resolveSymbolicLink(device.HostPath) + if err != nil { + return err + } + dev, err := devices.DeviceFromPath(path, device.Permissions) + // if there was no error, return the device + if err == nil { + rd := rspec.LinuxDevice{ + Path: device.ContainerPath, + Type: string(dev.Type), + Major: dev.Major, + Minor: dev.Minor, + UID: &dev.Uid, + GID: &dev.Gid, + } + specgen.AddDevice(rd) + sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{ + Allow: true, + Type: string(dev.Type), + Major: &dev.Major, + Minor: &dev.Minor, + Access: dev.Permissions, + }) + continue + } + // if the device is not a device node + // try to see if it's a directory holding many devices + if err == devices.ErrNotADevice { + + // check if it is a directory + if src, e := os.Stat(path); e == nil && src.IsDir() { + + // mount the internal devices recursively + filepath.Walk(path, func(dpath string, f os.FileInfo, e error) error { + childDevice, e := devices.DeviceFromPath(dpath, device.Permissions) + if e != nil { + // ignore the device + return nil + } + cPath := strings.Replace(dpath, path, device.ContainerPath, 1) + rd := rspec.LinuxDevice{ + Path: cPath, + Type: string(childDevice.Type), + Major: childDevice.Major, + Minor: childDevice.Minor, + UID: &childDevice.Uid, + GID: &childDevice.Gid, + } + specgen.AddDevice(rd) + sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{ + Allow: true, + Type: string(childDevice.Type), + Major: &childDevice.Major, + Minor: &childDevice.Minor, + Access: childDevice.Permissions, + }) + + return nil + }) + } + } + } + return nil +} diff --git a/server/container_create_unsupported.go b/server/container_create_unsupported.go new file mode 100644 index 00000000..f1646c37 --- /dev/null +++ b/server/container_create_unsupported.go @@ -0,0 +1,19 @@ +// +build !linux + +package server + +import ( + "fmt" + + "github.com/kubernetes-incubator/cri-o/lib/sandbox" + "github.com/opencontainers/runtime-tools/generate" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" +) + +func findCgroupMountpoint(name string) error { + return fmt.Errorf("no cgroups on this platform") +} + +func addDevicesPlatform(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { + return nil +} diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 3bf4aea6..3416b882 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -1,33 +1,17 @@ package server import ( - "encoding/json" - "fmt" - "io/ioutil" "os" "path" - "path/filepath" "regexp" - "strconv" - "strings" - "time" - "github.com/containers/storage" - "github.com/kubernetes-incubator/cri-o/lib/sandbox" "github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/pkg/annotations" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/runtime-tools/generate" "github.com/opencontainers/selinux/go-selinux/label" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" "golang.org/x/net/context" - "golang.org/x/sys/unix" "k8s.io/api/core/v1" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" - "k8s.io/kubernetes/pkg/kubelet/leaky" "k8s.io/kubernetes/pkg/kubelet/network/hostport" - "k8s.io/kubernetes/pkg/kubelet/types" ) const ( @@ -92,479 +76,8 @@ var ( // RunPodSandbox creates and runs a pod-level sandbox. func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { - const operation = "run_pod_sandbox" - defer func() { - recordOperation(operation, time.Now()) - recordError(operation, err) - }() - - s.updateLock.RLock() - defer s.updateLock.RUnlock() - - if req.GetConfig().GetMetadata() == nil { - return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Metadata is nil") - } - - logrus.Debugf("RunPodSandboxRequest %+v", req) - var processLabel, mountLabel, resolvPath string - // process req.Name - kubeName := req.GetConfig().GetMetadata().GetName() - if kubeName == "" { - return nil, fmt.Errorf("PodSandboxConfig.Name should not be empty") - } - - namespace := req.GetConfig().GetMetadata().GetNamespace() - attempt := req.GetConfig().GetMetadata().GetAttempt() - - id, name, err := s.generatePodIDandName(req.GetConfig()) - if err != nil { - if strings.Contains(err.Error(), "already reserved for pod") { - matches := conflictRE.FindStringSubmatch(err.Error()) - if len(matches) != 2 { - return nil, err - } - dupID := matches[1] - if _, err := s.StopPodSandbox(ctx, &pb.StopPodSandboxRequest{PodSandboxId: dupID}); err != nil { - return nil, err - } - if _, err := s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: dupID}); err != nil { - return nil, err - } - id, name, err = s.generatePodIDandName(req.GetConfig()) - if err != nil { - return nil, err - } - } else { - return nil, err - } - } - - defer func() { - if err != nil { - s.ReleasePodName(name) - } - }() - - _, containerName, err := s.generateContainerIDandNameForSandbox(req.GetConfig()) - if err != nil { - return nil, err - } - - defer func() { - if err != nil { - s.ReleaseContainerName(containerName) - } - }() - - podContainer, err := s.StorageRuntimeServer().CreatePodSandbox(s.ImageContext(), - name, id, - s.config.PauseImage, "", - containerName, - req.GetConfig().GetMetadata().GetName(), - req.GetConfig().GetMetadata().GetUid(), - namespace, - attempt, - nil) - if errors.Cause(err) == storage.ErrDuplicateName { - return nil, fmt.Errorf("pod sandbox with name %q already exists", name) - } - if err != nil { - return nil, fmt.Errorf("error creating pod sandbox with name %q: %v", name, err) - } - defer func() { - if err != nil { - if err2 := s.StorageRuntimeServer().RemovePodSandbox(id); err2 != nil { - logrus.Warnf("couldn't cleanup pod sandbox %q: %v", id, err2) - } - } - }() - - // TODO: factor generating/updating the spec into something other projects can vendor - - // creates a spec Generator with the default spec. - g := generate.New() - - // setup defaults for the pod sandbox - g.SetRootReadonly(true) - if s.config.PauseCommand == "" { - if podContainer.Config != nil { - g.SetProcessArgs(podContainer.Config.Config.Cmd) - } else { - g.SetProcessArgs([]string{sandbox.PodInfraCommand}) - } - } else { - g.SetProcessArgs([]string{s.config.PauseCommand}) - } - - // set DNS options - if req.GetConfig().GetDnsConfig() != nil { - dnsServers := req.GetConfig().GetDnsConfig().Servers - dnsSearches := req.GetConfig().GetDnsConfig().Searches - dnsOptions := req.GetConfig().GetDnsConfig().Options - resolvPath = fmt.Sprintf("%s/resolv.conf", podContainer.RunDir) - err = parseDNSOptions(dnsServers, dnsSearches, dnsOptions, resolvPath) - if err != nil { - err1 := removeFile(resolvPath) - if err1 != nil { - err = err1 - return nil, fmt.Errorf("%v; failed to remove %s: %v", err, resolvPath, err1) - } - return nil, err - } - if err := label.Relabel(resolvPath, mountLabel, true); err != nil && err != unix.ENOTSUP { - return nil, err - } - mnt := runtimespec.Mount{ - Type: "bind", - Source: resolvPath, - Destination: "/etc/resolv.conf", - Options: []string{"ro", "bind"}, - } - g.AddMount(mnt) - } - - // add metadata - metadata := req.GetConfig().GetMetadata() - metadataJSON, err := json.Marshal(metadata) - if err != nil { - return nil, err - } - - // add labels - labels := req.GetConfig().GetLabels() - - if err := validateLabels(labels); err != nil { - return nil, err - } - - // Add special container name label for the infra container - labelsJSON := []byte{} - if labels != nil { - labels[types.KubernetesContainerNameLabel] = leaky.PodInfraContainerName - labelsJSON, err = json.Marshal(labels) - if err != nil { - return nil, err - } - } - - // add annotations - kubeAnnotations := req.GetConfig().GetAnnotations() - kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations) - if err != nil { - return nil, err - } - - // set log directory - logDir := req.GetConfig().GetLogDirectory() - if logDir == "" { - logDir = filepath.Join(s.config.LogDir, id) - } - if err = os.MkdirAll(logDir, 0700); err != nil { - return nil, err - } - // This should always be absolute from k8s. - if !filepath.IsAbs(logDir) { - return nil, fmt.Errorf("requested logDir for sbox id %s is a relative path: %s", id, logDir) - } - - privileged := s.privilegedSandbox(req) - - securityContext := req.GetConfig().GetLinux().GetSecurityContext() - if securityContext == nil { - logrus.Warn("no security context found in config.") - } - - nsOptsJSON, err := json.Marshal(securityContext.GetNamespaceOptions()) - if err != nil { - return nil, err - } - - processLabel, mountLabel, err = getSELinuxLabels(securityContext.GetSelinuxOptions(), privileged) - if err != nil { - return nil, err - } - - // Don't use SELinux separation with Host Pid or IPC Namespace or privileged. - if securityContext.GetNamespaceOptions().GetPid() == pb.NamespaceMode_NODE || - securityContext.GetNamespaceOptions().GetIpc() == pb.NamespaceMode_NODE { - processLabel, mountLabel = "", "" - } - g.SetProcessSelinuxLabel(processLabel) - g.SetLinuxMountLabel(mountLabel) - - // create shm mount for the pod containers. - var shmPath string - if securityContext.GetNamespaceOptions().GetIpc() == pb.NamespaceMode_NODE { - shmPath = "/dev/shm" - } else { - shmPath, err = setupShm(podContainer.RunDir, mountLabel) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - if err2 := unix.Unmount(shmPath, unix.MNT_DETACH); err2 != nil { - logrus.Warnf("failed to unmount shm for pod: %v", err2) - } - } - }() - } - - err = s.setPodSandboxMountLabel(id, mountLabel) - if err != nil { - return nil, err - } - - if err = s.CtrIDIndex().Add(id); err != nil { - return nil, err - } - - defer func() { - if err != nil { - if err2 := s.CtrIDIndex().Delete(id); err2 != nil { - logrus.Warnf("couldn't delete ctr id %s from idIndex", id) - } - } - }() - - // set log path inside log directory - logPath := filepath.Join(logDir, id+".log") - - // Handle https://issues.k8s.io/44043 - if err := ensureSaneLogPath(logPath); err != nil { - return nil, err - } - - hostNetwork := securityContext.GetNamespaceOptions().GetNetwork() == pb.NamespaceMode_NODE - - hostname, err := getHostname(id, req.GetConfig().Hostname, hostNetwork) - if err != nil { - return nil, err - } - g.SetHostname(hostname) - - trusted := s.trustedSandbox(req) - g.AddAnnotation(annotations.Metadata, string(metadataJSON)) - g.AddAnnotation(annotations.Labels, string(labelsJSON)) - g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON)) - g.AddAnnotation(annotations.LogPath, logPath) - g.AddAnnotation(annotations.Name, name) - g.AddAnnotation(annotations.Namespace, namespace) - g.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox) - g.AddAnnotation(annotations.SandboxID, id) - g.AddAnnotation(annotations.ContainerName, containerName) - g.AddAnnotation(annotations.ContainerID, id) - g.AddAnnotation(annotations.ShmPath, shmPath) - g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged)) - g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted)) - g.AddAnnotation(annotations.ResolvPath, resolvPath) - g.AddAnnotation(annotations.HostName, hostname) - g.AddAnnotation(annotations.NamespaceOptions, string(nsOptsJSON)) - g.AddAnnotation(annotations.KubeName, kubeName) - if podContainer.Config.Config.StopSignal != "" { - // this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57 - g.AddAnnotation("org.opencontainers.image.stopSignal", podContainer.Config.Config.StopSignal) - } - - created := time.Now() - g.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano)) - - portMappings := convertPortMappings(req.GetConfig().GetPortMappings()) - portMappingsJSON, err := json.Marshal(portMappings) - if err != nil { - return nil, err - } - g.AddAnnotation(annotations.PortMappings, string(portMappingsJSON)) - - // setup cgroup settings - cgroupParent := req.GetConfig().GetLinux().GetCgroupParent() - if cgroupParent != "" { - if s.config.CgroupManager == oci.SystemdCgroupsManager { - if len(cgroupParent) <= 6 || !strings.HasSuffix(path.Base(cgroupParent), ".slice") { - return nil, fmt.Errorf("cri-o configured with systemd cgroup manager, but did not receive slice as parent: %s", cgroupParent) - } - cgPath, err := convertCgroupFsNameToSystemd(cgroupParent) - if err != nil { - return nil, err - } - g.SetLinuxCgroupsPath(cgPath + ":" + "crio" + ":" + id) - cgroupParent = cgPath - } else { - if strings.HasSuffix(path.Base(cgroupParent), ".slice") { - return nil, fmt.Errorf("cri-o configured with cgroupfs cgroup manager, but received systemd slice as parent: %s", cgroupParent) - } - cgPath := filepath.Join(cgroupParent, scopePrefix+"-"+id) - g.SetLinuxCgroupsPath(cgPath) - } - } - g.AddAnnotation(annotations.CgroupParent, cgroupParent) - - sb, err := sandbox.New(id, namespace, name, kubeName, logDir, labels, kubeAnnotations, processLabel, mountLabel, metadata, shmPath, cgroupParent, privileged, trusted, resolvPath, hostname, portMappings) - if err != nil { - return nil, err - } - - s.addSandbox(sb) - defer func() { - if err != nil { - s.removeSandbox(id) - } - }() - - if err = s.PodIDIndex().Add(id); err != nil { - return nil, err - } - - defer func() { - if err != nil { - if err := s.PodIDIndex().Delete(id); err != nil { - logrus.Warnf("couldn't delete pod id %s from idIndex", id) - } - } - }() - - for k, v := range kubeAnnotations { - g.AddAnnotation(k, v) - } - for k, v := range labels { - g.AddAnnotation(k, v) - } - - // extract linux sysctls from annotations and pass down to oci runtime - for key, value := range req.GetConfig().GetLinux().GetSysctls() { - g.AddLinuxSysctl(key, value) - } - - // Set OOM score adjust of the infra container to be very low - // so it doesn't get killed. - g.SetProcessOOMScoreAdj(PodInfraOOMAdj) - - g.SetLinuxResourcesCPUShares(PodInfraCPUshares) - - // set up namespaces - if hostNetwork { - err = g.RemoveLinuxNamespace(string(runtimespec.NetworkNamespace)) - if err != nil { - return nil, err - } - } else { - // Create the sandbox network namespace - if err = sb.NetNsCreate(); err != nil { - return nil, err - } - - defer func() { - if err == nil { - return - } - - if netnsErr := sb.NetNsRemove(); netnsErr != nil { - logrus.Warnf("Failed to remove networking namespace: %v", netnsErr) - } - }() - - // Pass the created namespace path to the runtime - err = g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), sb.NetNsPath()) - if err != nil { - return nil, err - } - } - - if securityContext.GetNamespaceOptions().GetPid() == pb.NamespaceMode_NODE { - err = g.RemoveLinuxNamespace(string(runtimespec.PIDNamespace)) - if err != nil { - return nil, err - } - } - - if securityContext.GetNamespaceOptions().GetIpc() == pb.NamespaceMode_NODE { - err = g.RemoveLinuxNamespace(string(runtimespec.IPCNamespace)) - if err != nil { - return nil, err - } - } - - if !s.seccompEnabled { - g.Spec().Linux.Seccomp = nil - } - - saveOptions := generate.ExportOptions{} - mountPoint, err := s.StorageRuntimeServer().StartContainer(id) - if err != nil { - return nil, fmt.Errorf("failed to mount container %s in pod sandbox %s(%s): %v", containerName, sb.Name(), id, err) - } - g.AddAnnotation(annotations.MountPoint, mountPoint) - g.SetRootPath(mountPoint) - - hostnamePath := fmt.Sprintf("%s/hostname", podContainer.RunDir) - if err := ioutil.WriteFile(hostnamePath, []byte(hostname+"\n"), 0644); err != nil { - return nil, err - } - if err := label.Relabel(hostnamePath, mountLabel, true); err != nil && err != unix.ENOTSUP { - return nil, err - } - mnt := runtimespec.Mount{ - Type: "bind", - Source: hostnamePath, - Destination: "/etc/hostname", - Options: []string{"ro", "bind"}, - } - g.AddMount(mnt) - g.AddAnnotation(annotations.HostnamePath, hostnamePath) - sb.AddHostnamePath(hostnamePath) - - container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs().Path(), labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal) - if err != nil { - return nil, err - } - container.SetSpec(g.Spec()) - container.SetMountPoint(mountPoint) - - sb.SetInfraContainer(container) - - var ip string - ip, err = s.networkStart(hostNetwork, sb) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - s.networkStop(hostNetwork, sb) - } - }() - - g.AddAnnotation(annotations.IP, ip) - sb.AddIP(ip) - sb.SetNamespaceOptions(securityContext.GetNamespaceOptions()) - - spp := req.GetConfig().GetLinux().GetSecurityContext().GetSeccompProfilePath() - g.AddAnnotation(annotations.SeccompProfilePath, spp) - sb.SetSeccompProfilePath(spp) - if !privileged { - if err = s.setupSeccomp(&g, spp); err != nil { - return nil, err - } - } - - err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions) - if err != nil { - return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", sb.Name(), id, err) - } - if err = g.SaveToFile(filepath.Join(podContainer.RunDir, "config.json"), saveOptions); err != nil { - return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.Name(), id, err) - } - - if err = s.runContainer(container, sb.CgroupParent()); err != nil { - return nil, err - } - - s.addInfraContainer(container) - - s.ContainerStateToDisk(container) - - resp = &pb.RunPodSandboxResponse{PodSandboxId: id} - logrus.Debugf("RunPodSandboxResponse: %+v", resp) - return resp, nil + // platform dependent call + return s.runPodSandbox(ctx, req) } func convertPortMappings(in []*pb.PortMapping) []*hostport.PortMapping { @@ -631,19 +144,6 @@ func getSELinuxLabels(selinuxOptions *pb.SELinuxOption, privileged bool) (proces return label.InitLabels(labels) } -func setupShm(podSandboxRunDir, mountLabel string) (shmPath string, err error) { - shmPath = filepath.Join(podSandboxRunDir, "shm") - if err = os.Mkdir(shmPath, 0700); err != nil { - return "", err - } - shmOptions := "mode=1777,size=" + strconv.Itoa(sandbox.DefaultShmSize) - if err = unix.Mount("shm", shmPath, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, - label.FormatMountLabel(shmOptions, mountLabel)); err != nil { - return "", fmt.Errorf("failed to mount shm tmpfs for pod: %v", err) - } - return shmPath, nil -} - // convertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name. // For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice // NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those diff --git a/server/sandbox_run_linux.go b/server/sandbox_run_linux.go new file mode 100644 index 00000000..72ea84a3 --- /dev/null +++ b/server/sandbox_run_linux.go @@ -0,0 +1,519 @@ +// +build linux + +package server + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/containers/storage" + "github.com/kubernetes-incubator/cri-o/lib/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/pkg/annotations" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "golang.org/x/sys/unix" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" + "k8s.io/kubernetes/pkg/kubelet/leaky" + "k8s.io/kubernetes/pkg/kubelet/types" +) + +func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { + const operation = "run_pod_sandbox" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + + s.updateLock.RLock() + defer s.updateLock.RUnlock() + + if req.GetConfig().GetMetadata() == nil { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Metadata is nil") + } + + logrus.Debugf("RunPodSandboxRequest %+v", req) + var processLabel, mountLabel, resolvPath string + // process req.Name + kubeName := req.GetConfig().GetMetadata().GetName() + if kubeName == "" { + return nil, fmt.Errorf("PodSandboxConfig.Name should not be empty") + } + + namespace := req.GetConfig().GetMetadata().GetNamespace() + attempt := req.GetConfig().GetMetadata().GetAttempt() + + id, name, err := s.generatePodIDandName(req.GetConfig()) + if err != nil { + if strings.Contains(err.Error(), "already reserved for pod") { + matches := conflictRE.FindStringSubmatch(err.Error()) + if len(matches) != 2 { + return nil, err + } + dupID := matches[1] + if _, err := s.StopPodSandbox(ctx, &pb.StopPodSandboxRequest{PodSandboxId: dupID}); err != nil { + return nil, err + } + if _, err := s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: dupID}); err != nil { + return nil, err + } + id, name, err = s.generatePodIDandName(req.GetConfig()) + if err != nil { + return nil, err + } + } else { + return nil, err + } + } + + defer func() { + if err != nil { + s.ReleasePodName(name) + } + }() + + _, containerName, err := s.generateContainerIDandNameForSandbox(req.GetConfig()) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + s.ReleaseContainerName(containerName) + } + }() + + podContainer, err := s.StorageRuntimeServer().CreatePodSandbox(s.ImageContext(), + name, id, + s.config.PauseImage, "", + containerName, + req.GetConfig().GetMetadata().GetName(), + req.GetConfig().GetMetadata().GetUid(), + namespace, + attempt, + nil) + if errors.Cause(err) == storage.ErrDuplicateName { + return nil, fmt.Errorf("pod sandbox with name %q already exists", name) + } + if err != nil { + return nil, fmt.Errorf("error creating pod sandbox with name %q: %v", name, err) + } + defer func() { + if err != nil { + if err2 := s.StorageRuntimeServer().RemovePodSandbox(id); err2 != nil { + logrus.Warnf("couldn't cleanup pod sandbox %q: %v", id, err2) + } + } + }() + + // TODO: factor generating/updating the spec into something other projects can vendor + + // creates a spec Generator with the default spec. + g := generate.New() + + // setup defaults for the pod sandbox + g.SetRootReadonly(true) + if s.config.PauseCommand == "" { + if podContainer.Config != nil { + g.SetProcessArgs(podContainer.Config.Config.Cmd) + } else { + g.SetProcessArgs([]string{sandbox.PodInfraCommand}) + } + } else { + g.SetProcessArgs([]string{s.config.PauseCommand}) + } + + // set DNS options + if req.GetConfig().GetDnsConfig() != nil { + dnsServers := req.GetConfig().GetDnsConfig().Servers + dnsSearches := req.GetConfig().GetDnsConfig().Searches + dnsOptions := req.GetConfig().GetDnsConfig().Options + resolvPath = fmt.Sprintf("%s/resolv.conf", podContainer.RunDir) + err = parseDNSOptions(dnsServers, dnsSearches, dnsOptions, resolvPath) + if err != nil { + err1 := removeFile(resolvPath) + if err1 != nil { + err = err1 + return nil, fmt.Errorf("%v; failed to remove %s: %v", err, resolvPath, err1) + } + return nil, err + } + if err := label.Relabel(resolvPath, mountLabel, true); err != nil && err != unix.ENOTSUP { + return nil, err + } + mnt := runtimespec.Mount{ + Type: "bind", + Source: resolvPath, + Destination: "/etc/resolv.conf", + Options: []string{"ro", "bind"}, + } + g.AddMount(mnt) + } + + // add metadata + metadata := req.GetConfig().GetMetadata() + metadataJSON, err := json.Marshal(metadata) + if err != nil { + return nil, err + } + + // add labels + labels := req.GetConfig().GetLabels() + + if err := validateLabels(labels); err != nil { + return nil, err + } + + // Add special container name label for the infra container + labelsJSON := []byte{} + if labels != nil { + labels[types.KubernetesContainerNameLabel] = leaky.PodInfraContainerName + labelsJSON, err = json.Marshal(labels) + if err != nil { + return nil, err + } + } + + // add annotations + kubeAnnotations := req.GetConfig().GetAnnotations() + kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations) + if err != nil { + return nil, err + } + + // set log directory + logDir := req.GetConfig().GetLogDirectory() + if logDir == "" { + logDir = filepath.Join(s.config.LogDir, id) + } + if err = os.MkdirAll(logDir, 0700); err != nil { + return nil, err + } + // This should always be absolute from k8s. + if !filepath.IsAbs(logDir) { + return nil, fmt.Errorf("requested logDir for sbox id %s is a relative path: %s", id, logDir) + } + + privileged := s.privilegedSandbox(req) + + securityContext := req.GetConfig().GetLinux().GetSecurityContext() + if securityContext == nil { + logrus.Warn("no security context found in config.") + } + + nsOptsJSON, err := json.Marshal(securityContext.GetNamespaceOptions()) + if err != nil { + return nil, err + } + + processLabel, mountLabel, err = getSELinuxLabels(securityContext.GetSelinuxOptions(), privileged) + if err != nil { + return nil, err + } + + // Don't use SELinux separation with Host Pid or IPC Namespace or privileged. + if securityContext.GetNamespaceOptions().GetPid() == pb.NamespaceMode_NODE || + securityContext.GetNamespaceOptions().GetIpc() == pb.NamespaceMode_NODE { + processLabel, mountLabel = "", "" + } + g.SetProcessSelinuxLabel(processLabel) + g.SetLinuxMountLabel(mountLabel) + + // create shm mount for the pod containers. + var shmPath string + if securityContext.GetNamespaceOptions().GetIpc() == pb.NamespaceMode_NODE { + shmPath = "/dev/shm" + } else { + shmPath, err = setupShm(podContainer.RunDir, mountLabel) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + if err2 := unix.Unmount(shmPath, unix.MNT_DETACH); err2 != nil { + logrus.Warnf("failed to unmount shm for pod: %v", err2) + } + } + }() + } + + err = s.setPodSandboxMountLabel(id, mountLabel) + if err != nil { + return nil, err + } + + if err = s.CtrIDIndex().Add(id); err != nil { + return nil, err + } + + defer func() { + if err != nil { + if err2 := s.CtrIDIndex().Delete(id); err2 != nil { + logrus.Warnf("couldn't delete ctr id %s from idIndex", id) + } + } + }() + + // set log path inside log directory + logPath := filepath.Join(logDir, id+".log") + + // Handle https://issues.k8s.io/44043 + if err := ensureSaneLogPath(logPath); err != nil { + return nil, err + } + + hostNetwork := securityContext.GetNamespaceOptions().GetNetwork() == pb.NamespaceMode_NODE + + hostname, err := getHostname(id, req.GetConfig().Hostname, hostNetwork) + if err != nil { + return nil, err + } + g.SetHostname(hostname) + + trusted := s.trustedSandbox(req) + g.AddAnnotation(annotations.Metadata, string(metadataJSON)) + g.AddAnnotation(annotations.Labels, string(labelsJSON)) + g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON)) + g.AddAnnotation(annotations.LogPath, logPath) + g.AddAnnotation(annotations.Name, name) + g.AddAnnotation(annotations.Namespace, namespace) + g.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox) + g.AddAnnotation(annotations.SandboxID, id) + g.AddAnnotation(annotations.ContainerName, containerName) + g.AddAnnotation(annotations.ContainerID, id) + g.AddAnnotation(annotations.ShmPath, shmPath) + g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged)) + g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted)) + g.AddAnnotation(annotations.ResolvPath, resolvPath) + g.AddAnnotation(annotations.HostName, hostname) + g.AddAnnotation(annotations.NamespaceOptions, string(nsOptsJSON)) + g.AddAnnotation(annotations.KubeName, kubeName) + if podContainer.Config.Config.StopSignal != "" { + // this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57 + g.AddAnnotation("org.opencontainers.image.stopSignal", podContainer.Config.Config.StopSignal) + } + + created := time.Now() + g.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano)) + + portMappings := convertPortMappings(req.GetConfig().GetPortMappings()) + portMappingsJSON, err := json.Marshal(portMappings) + if err != nil { + return nil, err + } + g.AddAnnotation(annotations.PortMappings, string(portMappingsJSON)) + + // setup cgroup settings + cgroupParent := req.GetConfig().GetLinux().GetCgroupParent() + if cgroupParent != "" { + if s.config.CgroupManager == oci.SystemdCgroupsManager { + if len(cgroupParent) <= 6 || !strings.HasSuffix(path.Base(cgroupParent), ".slice") { + return nil, fmt.Errorf("cri-o configured with systemd cgroup manager, but did not receive slice as parent: %s", cgroupParent) + } + cgPath, err := convertCgroupFsNameToSystemd(cgroupParent) + if err != nil { + return nil, err + } + g.SetLinuxCgroupsPath(cgPath + ":" + "crio" + ":" + id) + cgroupParent = cgPath + } else { + if strings.HasSuffix(path.Base(cgroupParent), ".slice") { + return nil, fmt.Errorf("cri-o configured with cgroupfs cgroup manager, but received systemd slice as parent: %s", cgroupParent) + } + cgPath := filepath.Join(cgroupParent, scopePrefix+"-"+id) + g.SetLinuxCgroupsPath(cgPath) + } + } + g.AddAnnotation(annotations.CgroupParent, cgroupParent) + + sb, err := sandbox.New(id, namespace, name, kubeName, logDir, labels, kubeAnnotations, processLabel, mountLabel, metadata, shmPath, cgroupParent, privileged, trusted, resolvPath, hostname, portMappings) + if err != nil { + return nil, err + } + + s.addSandbox(sb) + defer func() { + if err != nil { + s.removeSandbox(id) + } + }() + + if err = s.PodIDIndex().Add(id); err != nil { + return nil, err + } + + defer func() { + if err != nil { + if err := s.PodIDIndex().Delete(id); err != nil { + logrus.Warnf("couldn't delete pod id %s from idIndex", id) + } + } + }() + + for k, v := range kubeAnnotations { + g.AddAnnotation(k, v) + } + for k, v := range labels { + g.AddAnnotation(k, v) + } + + // extract linux sysctls from annotations and pass down to oci runtime + for key, value := range req.GetConfig().GetLinux().GetSysctls() { + g.AddLinuxSysctl(key, value) + } + + // Set OOM score adjust of the infra container to be very low + // so it doesn't get killed. + g.SetProcessOOMScoreAdj(PodInfraOOMAdj) + + g.SetLinuxResourcesCPUShares(PodInfraCPUshares) + + // set up namespaces + if hostNetwork { + err = g.RemoveLinuxNamespace(string(runtimespec.NetworkNamespace)) + if err != nil { + return nil, err + } + } else { + // Create the sandbox network namespace + if err = sb.NetNsCreate(); err != nil { + return nil, err + } + + defer func() { + if err == nil { + return + } + + if netnsErr := sb.NetNsRemove(); netnsErr != nil { + logrus.Warnf("Failed to remove networking namespace: %v", netnsErr) + } + }() + + // Pass the created namespace path to the runtime + err = g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), sb.NetNsPath()) + if err != nil { + return nil, err + } + } + + if securityContext.GetNamespaceOptions().GetPid() == pb.NamespaceMode_NODE { + err = g.RemoveLinuxNamespace(string(runtimespec.PIDNamespace)) + if err != nil { + return nil, err + } + } + + if securityContext.GetNamespaceOptions().GetIpc() == pb.NamespaceMode_NODE { + err = g.RemoveLinuxNamespace(string(runtimespec.IPCNamespace)) + if err != nil { + return nil, err + } + } + + if !s.seccompEnabled { + g.Spec().Linux.Seccomp = nil + } + + saveOptions := generate.ExportOptions{} + mountPoint, err := s.StorageRuntimeServer().StartContainer(id) + if err != nil { + return nil, fmt.Errorf("failed to mount container %s in pod sandbox %s(%s): %v", containerName, sb.Name(), id, err) + } + g.AddAnnotation(annotations.MountPoint, mountPoint) + g.SetRootPath(mountPoint) + + hostnamePath := fmt.Sprintf("%s/hostname", podContainer.RunDir) + if err := ioutil.WriteFile(hostnamePath, []byte(hostname+"\n"), 0644); err != nil { + return nil, err + } + if err := label.Relabel(hostnamePath, mountLabel, true); err != nil && err != unix.ENOTSUP { + return nil, err + } + mnt := runtimespec.Mount{ + Type: "bind", + Source: hostnamePath, + Destination: "/etc/hostname", + Options: []string{"ro", "bind"}, + } + g.AddMount(mnt) + g.AddAnnotation(annotations.HostnamePath, hostnamePath) + sb.AddHostnamePath(hostnamePath) + + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs().Path(), labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal) + if err != nil { + return nil, err + } + container.SetSpec(g.Spec()) + container.SetMountPoint(mountPoint) + + sb.SetInfraContainer(container) + + var ip string + ip, err = s.networkStart(hostNetwork, sb) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + s.networkStop(hostNetwork, sb) + } + }() + + g.AddAnnotation(annotations.IP, ip) + sb.AddIP(ip) + sb.SetNamespaceOptions(securityContext.GetNamespaceOptions()) + + spp := req.GetConfig().GetLinux().GetSecurityContext().GetSeccompProfilePath() + g.AddAnnotation(annotations.SeccompProfilePath, spp) + sb.SetSeccompProfilePath(spp) + if !privileged { + if err = s.setupSeccomp(&g, spp); err != nil { + return nil, err + } + } + + err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions) + if err != nil { + return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", sb.Name(), id, err) + } + if err = g.SaveToFile(filepath.Join(podContainer.RunDir, "config.json"), saveOptions); err != nil { + return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.Name(), id, err) + } + + if err = s.runContainer(container, sb.CgroupParent()); err != nil { + return nil, err + } + + s.addInfraContainer(container) + + s.ContainerStateToDisk(container) + + resp = &pb.RunPodSandboxResponse{PodSandboxId: id} + logrus.Debugf("RunPodSandboxResponse: %+v", resp) + return resp, nil +} + +func setupShm(podSandboxRunDir, mountLabel string) (shmPath string, err error) { + shmPath = filepath.Join(podSandboxRunDir, "shm") + if err = os.Mkdir(shmPath, 0700); err != nil { + return "", err + } + shmOptions := "mode=1777,size=" + strconv.Itoa(sandbox.DefaultShmSize) + if err = unix.Mount("shm", shmPath, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, + label.FormatMountLabel(shmOptions, mountLabel)); err != nil { + return "", fmt.Errorf("failed to mount shm tmpfs for pod: %v", err) + } + return shmPath, nil +} diff --git a/server/sandbox_run_unsupported.go b/server/sandbox_run_unsupported.go new file mode 100644 index 00000000..494a7407 --- /dev/null +++ b/server/sandbox_run_unsupported.go @@ -0,0 +1,14 @@ +// +build !linux + +package server + +import ( + "fmt" + + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" +) + +func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { + return nil, fmt.Errorf("unsupported") +} diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go index aa3d883f..bf8995ec 100644 --- a/server/sandbox_stop.go +++ b/server/sandbox_stop.go @@ -1,125 +1,16 @@ package server import ( - "fmt" - "time" - - "github.com/containers/storage" - "github.com/docker/docker/pkg/mount" - "github.com/docker/docker/pkg/symlink" - "github.com/kubernetes-incubator/cri-o/lib/sandbox" - "github.com/kubernetes-incubator/cri-o/oci" - "github.com/opencontainers/selinux/go-selinux/label" - "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/net/context" - "golang.org/x/sys/unix" pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" ) // StopPodSandbox stops the sandbox. If there are any running containers in the // sandbox, they should be force terminated. func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (resp *pb.StopPodSandboxResponse, err error) { - const operation = "stop_pod_sandbox" - defer func() { - recordOperation(operation, time.Now()) - recordError(operation, err) - }() - - logrus.Debugf("StopPodSandboxRequest %+v", req) - sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) - if err != nil { - if err == sandbox.ErrIDEmpty { - return nil, err - } - - // If the sandbox isn't found we just return an empty response to adhere - // the the CRI interface which expects to not error out in not found - // cases. - - resp = &pb.StopPodSandboxResponse{} - logrus.Warnf("could not get sandbox %s, it's probably been stopped already: %v", req.PodSandboxId, err) - logrus.Debugf("StopPodSandboxResponse %s: %+v", req.PodSandboxId, resp) - return resp, nil - } - - if sb.Stopped() { - resp = &pb.StopPodSandboxResponse{} - logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) - return resp, nil - } - - podInfraContainer := sb.InfraContainer() - containers := sb.Containers().List() - containers = append(containers, podInfraContainer) - - for _, c := range containers { - cStatus := s.Runtime().ContainerStatus(c) - if cStatus.Status != oci.ContainerStateStopped { - if c.ID() == podInfraContainer.ID() { - continue - } - timeout := int64(10) - if err := s.Runtime().StopContainer(ctx, c, timeout); err != nil { - return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) - } - if err := s.Runtime().WaitContainerStateStopped(ctx, c, timeout); err != nil { - return nil, fmt.Errorf("failed to get container 'stopped' status %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) - } - if err := s.StorageRuntimeServer().StopContainer(c.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown { - // assume container already umounted - logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) - } - } - s.ContainerStateToDisk(c) - } - - // Clean up sandbox networking and close its network namespace. - hostNetwork := sb.NetNsPath() == "" - s.networkStop(hostNetwork, sb) - podInfraStatus := s.Runtime().ContainerStatus(podInfraContainer) - if podInfraStatus.Status != oci.ContainerStateStopped { - timeout := int64(10) - if err := s.Runtime().StopContainer(ctx, podInfraContainer, timeout); err != nil { - return nil, fmt.Errorf("failed to stop infra container %s in pod sandbox %s: %v", podInfraContainer.Name(), sb.ID(), err) - } - if err := s.Runtime().WaitContainerStateStopped(ctx, podInfraContainer, timeout); err != nil { - return nil, fmt.Errorf("failed to get infra container 'stopped' status %s in pod sandbox %s: %v", podInfraContainer.Name(), sb.ID(), err) - } - } - if err := sb.NetNsRemove(); err != nil { - return nil, err - } - - if err := label.ReleaseLabel(sb.ProcessLabel()); err != nil { - return nil, err - } - - // unmount the shm for the pod - if sb.ShmPath() != "/dev/shm" { - // we got namespaces in the form of - // /var/run/containers/storage/overlay-containers/CID/userdata/shm - // but /var/run on most system is symlinked to /run so we first resolve - // the symlink and then try and see if it's mounted - fp, err := symlink.FollowSymlinkInScope(sb.ShmPath(), "/") - if err != nil { - return nil, err - } - if mounted, err := mount.Mounted(fp); err == nil && mounted { - if err := unix.Unmount(fp, unix.MNT_DETACH); err != nil { - return nil, err - } - } - } - - if err := s.StorageRuntimeServer().StopContainer(sb.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown { - logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.ID(), err) - } - - sb.SetStopped() - resp = &pb.StopPodSandboxResponse{} - logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) - return resp, nil + // platform dependent call + return s.stopPodSandbox(ctx, req) } // StopAllPodSandboxes removes all pod sandboxes diff --git a/server/sandbox_stop_linux.go b/server/sandbox_stop_linux.go new file mode 100644 index 00000000..919ce9ac --- /dev/null +++ b/server/sandbox_stop_linux.go @@ -0,0 +1,123 @@ +// +build linux + +package server + +import ( + "fmt" + "time" + + "github.com/containers/storage" + "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/symlink" + "github.com/kubernetes-incubator/cri-o/lib/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "golang.org/x/sys/unix" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" +) + +func (s *Server) stopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (resp *pb.StopPodSandboxResponse, err error) { + const operation = "stop_pod_sandbox" + defer func() { + recordOperation(operation, time.Now()) + recordError(operation, err) + }() + + logrus.Debugf("StopPodSandboxRequest %+v", req) + sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) + if err != nil { + if err == sandbox.ErrIDEmpty { + return nil, err + } + + // If the sandbox isn't found we just return an empty response to adhere + // the the CRI interface which expects to not error out in not found + // cases. + + resp = &pb.StopPodSandboxResponse{} + logrus.Warnf("could not get sandbox %s, it's probably been stopped already: %v", req.PodSandboxId, err) + logrus.Debugf("StopPodSandboxResponse %s: %+v", req.PodSandboxId, resp) + return resp, nil + } + + if sb.Stopped() { + resp = &pb.StopPodSandboxResponse{} + logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) + return resp, nil + } + + podInfraContainer := sb.InfraContainer() + containers := sb.Containers().List() + containers = append(containers, podInfraContainer) + + for _, c := range containers { + cStatus := s.Runtime().ContainerStatus(c) + if cStatus.Status != oci.ContainerStateStopped { + if c.ID() == podInfraContainer.ID() { + continue + } + timeout := int64(10) + if err := s.Runtime().StopContainer(ctx, c, timeout); err != nil { + return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + if err := s.Runtime().WaitContainerStateStopped(ctx, c, timeout); err != nil { + return nil, fmt.Errorf("failed to get container 'stopped' status %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + if err := s.StorageRuntimeServer().StopContainer(c.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown { + // assume container already umounted + logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + } + s.ContainerStateToDisk(c) + } + + // Clean up sandbox networking and close its network namespace. + hostNetwork := sb.NetNsPath() == "" + s.networkStop(hostNetwork, sb) + podInfraStatus := s.Runtime().ContainerStatus(podInfraContainer) + if podInfraStatus.Status != oci.ContainerStateStopped { + timeout := int64(10) + if err := s.Runtime().StopContainer(ctx, podInfraContainer, timeout); err != nil { + return nil, fmt.Errorf("failed to stop infra container %s in pod sandbox %s: %v", podInfraContainer.Name(), sb.ID(), err) + } + if err := s.Runtime().WaitContainerStateStopped(ctx, podInfraContainer, timeout); err != nil { + return nil, fmt.Errorf("failed to get infra container 'stopped' status %s in pod sandbox %s: %v", podInfraContainer.Name(), sb.ID(), err) + } + } + if err := sb.NetNsRemove(); err != nil { + return nil, err + } + + if err := label.ReleaseLabel(sb.ProcessLabel()); err != nil { + return nil, err + } + + // unmount the shm for the pod + if sb.ShmPath() != "/dev/shm" { + // we got namespaces in the form of + // /var/run/containers/storage/overlay-containers/CID/userdata/shm + // but /var/run on most system is symlinked to /run so we first resolve + // the symlink and then try and see if it's mounted + fp, err := symlink.FollowSymlinkInScope(sb.ShmPath(), "/") + if err != nil { + return nil, err + } + if mounted, err := mount.Mounted(fp); err == nil && mounted { + if err := unix.Unmount(fp, unix.MNT_DETACH); err != nil { + return nil, err + } + } + } + + if err := s.StorageRuntimeServer().StopContainer(sb.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown { + logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.ID(), err) + } + + sb.SetStopped() + resp = &pb.StopPodSandboxResponse{} + logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) + return resp, nil +} diff --git a/server/sandbox_stop_unsupported.go b/server/sandbox_stop_unsupported.go new file mode 100644 index 00000000..d9dc372e --- /dev/null +++ b/server/sandbox_stop_unsupported.go @@ -0,0 +1,14 @@ +// +build !linux + +package server + +import ( + "fmt" + + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" +) + +func (s *Server) stopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (resp *pb.StopPodSandboxResponse, err error) { + return nil, fmt.Errorf("unsupported") +}