96fb47213e
We had a bug in ImageStatus where we weren't returning the default image user if set, thus running all containers as root despite a user being set in the image config. We weren't populating the Username field of ImageStatus. This patch fixes that along with the handling of multiple images based on the registry patch for multiple images. It also fixes ListImages to return Username as well. Signed-off-by: Antonio Murdaca <runcom@redhat.com>
1438 lines
42 KiB
Go
1438 lines
42 KiB
Go
package server
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
dockermounts "github.com/docker/docker/pkg/mount"
|
|
"github.com/docker/docker/pkg/stringid"
|
|
"github.com/docker/docker/pkg/symlink"
|
|
"github.com/kubernetes-incubator/cri-o/lib"
|
|
"github.com/kubernetes-incubator/cri-o/lib/sandbox"
|
|
"github.com/kubernetes-incubator/cri-o/oci"
|
|
"github.com/kubernetes-incubator/cri-o/pkg/annotations"
|
|
"github.com/kubernetes-incubator/cri-o/pkg/storage"
|
|
"github.com/kubernetes-incubator/cri-o/server/apparmor"
|
|
"github.com/kubernetes-incubator/cri-o/server/seccomp"
|
|
"github.com/opencontainers/image-spec/specs-go/v1"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
"github.com/opencontainers/runc/libcontainer/devices"
|
|
"github.com/opencontainers/runc/libcontainer/user"
|
|
rspec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/opencontainers/runtime-tools/generate"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/net/context"
|
|
"golang.org/x/sys/unix"
|
|
pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
|
|
)
|
|
|
|
const (
|
|
seccompUnconfined = "unconfined"
|
|
seccompRuntimeDefault = "runtime/default"
|
|
seccompDockerDefault = "docker/default"
|
|
seccompLocalhostPrefix = "localhost/"
|
|
|
|
scopePrefix = "crio"
|
|
defaultCgroupfsParent = "/crio"
|
|
defaultSystemdParent = "system.slice"
|
|
)
|
|
|
|
type orderedMounts []rspec.Mount
|
|
|
|
// Len returns the number of mounts. Used in sorting.
|
|
func (m orderedMounts) Len() int {
|
|
return len(m)
|
|
}
|
|
|
|
// Less returns true if the number of parts (a/b/c would be 3 parts) in the
|
|
// mount indexed by parameter 1 is less than that of the mount indexed by
|
|
// parameter 2. Used in sorting.
|
|
func (m orderedMounts) Less(i, j int) bool {
|
|
return m.parts(i) < m.parts(j)
|
|
}
|
|
|
|
// Swap swaps two items in an array of mounts. Used in sorting
|
|
func (m orderedMounts) Swap(i, j int) {
|
|
m[i], m[j] = m[j], m[i]
|
|
}
|
|
|
|
// parts returns the number of parts in the destination of a mount. Used in sorting.
|
|
func (m orderedMounts) parts(i int) int {
|
|
return strings.Count(filepath.Clean(m[i].Destination), string(os.PathSeparator))
|
|
}
|
|
|
|
func addOCIBindMounts(mountLabel string, containerConfig *pb.ContainerConfig, specgen *generate.Generator) ([]oci.ContainerVolume, []rspec.Mount, error) {
|
|
volumes := []oci.ContainerVolume{}
|
|
ociMounts := []rspec.Mount{}
|
|
mounts := containerConfig.GetMounts()
|
|
for _, mount := range mounts {
|
|
dest := mount.ContainerPath
|
|
if dest == "" {
|
|
return nil, nil, fmt.Errorf("Mount.ContainerPath is empty")
|
|
}
|
|
|
|
src := mount.HostPath
|
|
if src == "" {
|
|
return nil, nil, fmt.Errorf("Mount.HostPath is empty")
|
|
}
|
|
|
|
if _, err := os.Stat(src); err != nil && os.IsNotExist(err) {
|
|
if err1 := os.MkdirAll(src, 0644); err1 != nil {
|
|
return nil, nil, fmt.Errorf("Failed to mkdir %s: %s", src, err)
|
|
}
|
|
}
|
|
|
|
src, err := resolveSymbolicLink(src)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("failed to resolve symlink %q: %v", src, err)
|
|
}
|
|
|
|
options := []string{"rw"}
|
|
if mount.Readonly {
|
|
options = []string{"ro"}
|
|
}
|
|
options = append(options, "rbind")
|
|
|
|
// mount propagation
|
|
mountInfos, err := dockermounts.GetMounts()
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
switch mount.GetPropagation() {
|
|
case pb.MountPropagation_PROPAGATION_PRIVATE:
|
|
options = append(options, "rprivate")
|
|
// Since default root propagation in runc is rprivate ignore
|
|
// setting the root propagation
|
|
case pb.MountPropagation_PROPAGATION_BIDIRECTIONAL:
|
|
if err := ensureShared(src, mountInfos); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
options = append(options, "rshared")
|
|
specgen.SetLinuxRootPropagation("rshared")
|
|
case pb.MountPropagation_PROPAGATION_HOST_TO_CONTAINER:
|
|
if err := ensureSharedOrSlave(src, mountInfos); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
options = append(options, "rslave")
|
|
if specgen.Spec().Linux.RootfsPropagation != "rshared" &&
|
|
specgen.Spec().Linux.RootfsPropagation != "rslave" {
|
|
specgen.SetLinuxRootPropagation("rslave")
|
|
}
|
|
default:
|
|
logrus.Warnf("Unknown propagation mode for hostPath %q", mount.HostPath)
|
|
options = append(options, "rprivate")
|
|
}
|
|
|
|
if mount.SelinuxRelabel {
|
|
// Need a way in kubernetes to determine if the volume is shared or private
|
|
if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP {
|
|
return nil, nil, fmt.Errorf("relabel failed %s: %v", src, err)
|
|
}
|
|
}
|
|
|
|
volumes = append(volumes, oci.ContainerVolume{
|
|
ContainerPath: dest,
|
|
HostPath: src,
|
|
Readonly: mount.Readonly,
|
|
})
|
|
|
|
ociMounts = append(ociMounts, rspec.Mount{
|
|
Source: src,
|
|
Destination: dest,
|
|
Options: options,
|
|
})
|
|
}
|
|
|
|
return volumes, ociMounts, nil
|
|
}
|
|
|
|
// Ensure mount point on which path is mounted, is shared.
|
|
func ensureShared(path string, mountInfos []*dockermounts.Info) error {
|
|
sourceMount, optionalOpts, err := getSourceMount(path, mountInfos)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure source mount point is shared.
|
|
optsSplit := strings.Split(optionalOpts, " ")
|
|
for _, opt := range optsSplit {
|
|
if strings.HasPrefix(opt, "shared:") {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return fmt.Errorf("path %q is mounted on %q but it is not a shared mount", path, sourceMount)
|
|
}
|
|
|
|
// Ensure mount point on which path is mounted, is either shared or slave.
|
|
func ensureSharedOrSlave(path string, mountInfos []*dockermounts.Info) error {
|
|
sourceMount, optionalOpts, err := getSourceMount(path, mountInfos)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Make sure source mount point is shared.
|
|
optsSplit := strings.Split(optionalOpts, " ")
|
|
for _, opt := range optsSplit {
|
|
if strings.HasPrefix(opt, "shared:") {
|
|
return nil
|
|
} else if strings.HasPrefix(opt, "master:") {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, sourceMount)
|
|
}
|
|
|
|
func getMountInfo(mountInfos []*dockermounts.Info, dir string) *dockermounts.Info {
|
|
for _, m := range mountInfos {
|
|
if m.Mountpoint == dir {
|
|
return m
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func getSourceMount(source string, mountInfos []*dockermounts.Info) (string, string, error) {
|
|
mountinfo := getMountInfo(mountInfos, source)
|
|
if mountinfo != nil {
|
|
return source, mountinfo.Optional, nil
|
|
}
|
|
|
|
path := source
|
|
for {
|
|
path = filepath.Dir(path)
|
|
mountinfo = getMountInfo(mountInfos, path)
|
|
if mountinfo != nil {
|
|
return path, mountinfo.Optional, nil
|
|
}
|
|
|
|
if path == "/" {
|
|
break
|
|
}
|
|
}
|
|
|
|
// If we are here, we did not find parent mount. Something is wrong.
|
|
return "", "", fmt.Errorf("Could not find source mount of %s", source)
|
|
}
|
|
|
|
func addImageVolumes(rootfs string, s *Server, containerInfo *storage.ContainerInfo, specgen *generate.Generator, mountLabel string) ([]rspec.Mount, error) {
|
|
mounts := []rspec.Mount{}
|
|
for dest := range containerInfo.Config.Config.Volumes {
|
|
fp, err := symlink.FollowSymlinkInScope(filepath.Join(rootfs, dest), rootfs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
switch s.config.ImageVolumes {
|
|
case lib.ImageVolumesMkdir:
|
|
if err1 := os.MkdirAll(fp, 0644); err1 != nil {
|
|
return nil, err1
|
|
}
|
|
case lib.ImageVolumesBind:
|
|
volumeDirName := stringid.GenerateNonCryptoID()
|
|
src := filepath.Join(containerInfo.RunDir, "mounts", volumeDirName)
|
|
if err1 := os.MkdirAll(src, 0644); err1 != nil {
|
|
return nil, err1
|
|
}
|
|
// Label the source with the sandbox selinux mount label
|
|
if mountLabel != "" {
|
|
if err1 := label.Relabel(src, mountLabel, true); err1 != nil && err1 != unix.ENOTSUP {
|
|
return nil, fmt.Errorf("relabel failed %s: %v", src, err1)
|
|
}
|
|
}
|
|
|
|
logrus.Debugf("Adding bind mounted volume: %s to %s", src, dest)
|
|
mounts = append(mounts, rspec.Mount{
|
|
Source: src,
|
|
Destination: dest,
|
|
Options: []string{"rw"},
|
|
})
|
|
|
|
case lib.ImageVolumesIgnore:
|
|
logrus.Debugf("Ignoring volume %v", dest)
|
|
default:
|
|
logrus.Fatalf("Unrecognized image volumes setting")
|
|
}
|
|
}
|
|
return mounts, nil
|
|
}
|
|
|
|
// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved
|
|
// path; if not, returns the original path.
|
|
func resolveSymbolicLink(path string) (string, error) {
|
|
info, err := os.Lstat(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if info.Mode()&os.ModeSymlink != os.ModeSymlink {
|
|
return path, nil
|
|
}
|
|
return filepath.EvalSymlinks(path)
|
|
}
|
|
|
|
func addDevices(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error {
|
|
sp := specgen.Spec()
|
|
if containerConfig.GetLinux().GetSecurityContext().GetPrivileged() {
|
|
hostDevices, err := devices.HostDevices()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, hostDevice := range hostDevices {
|
|
rd := rspec.LinuxDevice{
|
|
Path: hostDevice.Path,
|
|
Type: string(hostDevice.Type),
|
|
Major: hostDevice.Major,
|
|
Minor: hostDevice.Minor,
|
|
UID: &hostDevice.Uid,
|
|
GID: &hostDevice.Gid,
|
|
}
|
|
if hostDevice.Major == 0 && hostDevice.Minor == 0 {
|
|
// Invalid device, most likely a symbolic link, skip it.
|
|
continue
|
|
}
|
|
specgen.AddDevice(rd)
|
|
}
|
|
sp.Linux.Resources.Devices = []rspec.LinuxDeviceCgroup{
|
|
{
|
|
Allow: true,
|
|
Access: "rwm",
|
|
},
|
|
}
|
|
return nil
|
|
}
|
|
for _, device := range containerConfig.GetDevices() {
|
|
path, err := resolveSymbolicLink(device.HostPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dev, err := devices.DeviceFromPath(path, device.Permissions)
|
|
// if there was no error, return the device
|
|
if err == nil {
|
|
rd := rspec.LinuxDevice{
|
|
Path: device.ContainerPath,
|
|
Type: string(dev.Type),
|
|
Major: dev.Major,
|
|
Minor: dev.Minor,
|
|
UID: &dev.Uid,
|
|
GID: &dev.Gid,
|
|
}
|
|
specgen.AddDevice(rd)
|
|
sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{
|
|
Allow: true,
|
|
Type: string(dev.Type),
|
|
Major: &dev.Major,
|
|
Minor: &dev.Minor,
|
|
Access: dev.Permissions,
|
|
})
|
|
continue
|
|
}
|
|
// if the device is not a device node
|
|
// try to see if it's a directory holding many devices
|
|
if err == devices.ErrNotADevice {
|
|
|
|
// check if it is a directory
|
|
if src, e := os.Stat(path); e == nil && src.IsDir() {
|
|
|
|
// mount the internal devices recursively
|
|
filepath.Walk(path, func(dpath string, f os.FileInfo, e error) error {
|
|
childDevice, e := devices.DeviceFromPath(dpath, device.Permissions)
|
|
if e != nil {
|
|
// ignore the device
|
|
return nil
|
|
}
|
|
cPath := strings.Replace(dpath, path, device.ContainerPath, 1)
|
|
rd := rspec.LinuxDevice{
|
|
Path: cPath,
|
|
Type: string(childDevice.Type),
|
|
Major: childDevice.Major,
|
|
Minor: childDevice.Minor,
|
|
UID: &childDevice.Uid,
|
|
GID: &childDevice.Gid,
|
|
}
|
|
specgen.AddDevice(rd)
|
|
sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{
|
|
Allow: true,
|
|
Type: string(childDevice.Type),
|
|
Major: &childDevice.Major,
|
|
Minor: &childDevice.Minor,
|
|
Access: childDevice.Permissions,
|
|
})
|
|
|
|
return nil
|
|
})
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// buildOCIProcessArgs build an OCI compatible process arguments slice.
|
|
func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, imageOCIConfig *v1.Image) ([]string, error) {
|
|
//# Start the nginx container using the default command, but use custom
|
|
//arguments (arg1 .. argN) for that command.
|
|
//kubectl run nginx --image=nginx -- <arg1> <arg2> ... <argN>
|
|
|
|
//# Start the nginx container using a different command and custom arguments.
|
|
//kubectl run nginx --image=nginx --command -- <cmd> <arg1> ... <argN>
|
|
|
|
kubeCommands := containerKubeConfig.Command
|
|
kubeArgs := containerKubeConfig.Args
|
|
|
|
// merge image config and kube config
|
|
// same as docker does today...
|
|
if imageOCIConfig != nil {
|
|
if len(kubeCommands) == 0 {
|
|
if len(kubeArgs) == 0 {
|
|
kubeArgs = imageOCIConfig.Config.Cmd
|
|
}
|
|
if kubeCommands == nil {
|
|
kubeCommands = imageOCIConfig.Config.Entrypoint
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(kubeCommands) == 0 && len(kubeArgs) == 0 {
|
|
return nil, fmt.Errorf("no command specified")
|
|
}
|
|
|
|
// create entrypoint and args
|
|
var entrypoint string
|
|
var args []string
|
|
if len(kubeCommands) != 0 {
|
|
entrypoint = kubeCommands[0]
|
|
args = append(kubeCommands[1:], kubeArgs...)
|
|
} else {
|
|
entrypoint = kubeArgs[0]
|
|
args = kubeArgs[1:]
|
|
}
|
|
|
|
processArgs := append([]string{entrypoint}, args...)
|
|
|
|
logrus.Debugf("OCI process args %v", processArgs)
|
|
|
|
return processArgs, nil
|
|
}
|
|
|
|
// addOCIHook look for hooks programs installed in hooksDirPath and add them to spec
|
|
func addOCIHook(specgen *generate.Generator, hook lib.HookParams) error {
|
|
logrus.Debugf("AddOCIHook", hook)
|
|
for _, stage := range hook.Stage {
|
|
h := rspec.Hook{
|
|
Path: hook.Hook,
|
|
Args: append([]string{hook.Hook}, hook.Arguments...),
|
|
Env: []string{fmt.Sprintf("stage=%s", stage)},
|
|
}
|
|
switch stage {
|
|
case "prestart":
|
|
specgen.AddPreStartHook(h)
|
|
case "poststart":
|
|
specgen.AddPostStartHook(h)
|
|
case "poststop":
|
|
specgen.AddPostStopHook(h)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// setupContainerUser sets the UID, GID and supplemental groups in OCI runtime config
|
|
func setupContainerUser(specgen *generate.Generator, rootfs string, sc *pb.LinuxContainerSecurityContext, imageConfig *v1.Image) error {
|
|
if sc != nil {
|
|
containerUser := ""
|
|
// Case 1: run as user is set by kubelet
|
|
if sc.GetRunAsUser() != nil {
|
|
containerUser = strconv.FormatInt(sc.GetRunAsUser().GetValue(), 10)
|
|
} else {
|
|
// Case 2: run as username is set by kubelet
|
|
userName := sc.GetRunAsUsername()
|
|
if userName != "" {
|
|
containerUser = userName
|
|
} else {
|
|
// Case 3: get user from image config
|
|
if imageConfig != nil {
|
|
imageUser := imageConfig.Config.User
|
|
if imageUser != "" {
|
|
containerUser = imageUser
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
logrus.Debugf("CONTAINER USER: %+v", containerUser)
|
|
|
|
// Add uid, gid and groups from user
|
|
uid, gid, addGroups, err1 := getUserInfo(rootfs, containerUser)
|
|
if err1 != nil {
|
|
return err1
|
|
}
|
|
|
|
logrus.Debugf("UID: %v, GID: %v, Groups: %+v", uid, gid, addGroups)
|
|
specgen.SetProcessUID(uid)
|
|
specgen.SetProcessGID(gid)
|
|
for _, group := range addGroups {
|
|
specgen.AddProcessAdditionalGid(group)
|
|
}
|
|
|
|
// Add groups from CRI
|
|
groups := sc.GetSupplementalGroups()
|
|
for _, group := range groups {
|
|
specgen.AddProcessAdditionalGid(uint32(group))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// setupCapabilities sets process.capabilities in the OCI runtime config.
|
|
func setupCapabilities(specgen *generate.Generator, capabilities *pb.Capability) error {
|
|
if capabilities == nil {
|
|
return nil
|
|
}
|
|
|
|
toCAPPrefixed := func(cap string) string {
|
|
if !strings.HasPrefix(strings.ToLower(cap), "cap_") {
|
|
return "CAP_" + strings.ToUpper(cap)
|
|
}
|
|
return cap
|
|
}
|
|
|
|
// Add/drop all capabilities if "all" is specified, so that
|
|
// following individual add/drop could still work. E.g.
|
|
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
|
|
// will be all capabilities without `CAP_CHOWN`.
|
|
// see https://github.com/kubernetes/kubernetes/issues/51980
|
|
if inStringSlice(capabilities.GetAddCapabilities(), "ALL") {
|
|
for _, c := range getOCICapabilitiesList() {
|
|
if err := specgen.AddProcessCapabilityAmbient(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityBounding(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityEffective(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityInheritable(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityPermitted(c); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
if inStringSlice(capabilities.GetDropCapabilities(), "ALL") {
|
|
for _, c := range getOCICapabilitiesList() {
|
|
if err := specgen.DropProcessCapabilityAmbient(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.DropProcessCapabilityBounding(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.DropProcessCapabilityEffective(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.DropProcessCapabilityInheritable(c); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.DropProcessCapabilityPermitted(c); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, cap := range capabilities.GetAddCapabilities() {
|
|
if strings.ToUpper(cap) == "ALL" {
|
|
continue
|
|
}
|
|
capPrefixed := toCAPPrefixed(cap)
|
|
if err := specgen.AddProcessCapabilityAmbient(capPrefixed); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityBounding(capPrefixed); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityEffective(capPrefixed); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityInheritable(capPrefixed); err != nil {
|
|
return err
|
|
}
|
|
if err := specgen.AddProcessCapabilityPermitted(capPrefixed); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
for _, cap := range capabilities.GetDropCapabilities() {
|
|
if strings.ToUpper(cap) == "ALL" {
|
|
continue
|
|
}
|
|
capPrefixed := toCAPPrefixed(cap)
|
|
if err := specgen.DropProcessCapabilityAmbient(capPrefixed); err != nil {
|
|
return fmt.Errorf("failed to drop cap %s %v", capPrefixed, err)
|
|
}
|
|
if err := specgen.DropProcessCapabilityBounding(capPrefixed); err != nil {
|
|
return fmt.Errorf("failed to drop cap %s %v", capPrefixed, err)
|
|
}
|
|
if err := specgen.DropProcessCapabilityEffective(capPrefixed); err != nil {
|
|
return fmt.Errorf("failed to drop cap %s %v", capPrefixed, err)
|
|
}
|
|
if err := specgen.DropProcessCapabilityInheritable(capPrefixed); err != nil {
|
|
return fmt.Errorf("failed to drop cap %s %v", capPrefixed, err)
|
|
}
|
|
if err := specgen.DropProcessCapabilityPermitted(capPrefixed); err != nil {
|
|
return fmt.Errorf("failed to drop cap %s %v", capPrefixed, err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func hostNetwork(containerConfig *pb.ContainerConfig) bool {
|
|
securityContext := containerConfig.GetLinux().GetSecurityContext()
|
|
if securityContext == nil || securityContext.GetNamespaceOptions() == nil {
|
|
return false
|
|
}
|
|
|
|
return securityContext.GetNamespaceOptions().HostNetwork
|
|
}
|
|
|
|
// ensureSaneLogPath is a hack to fix https://issues.k8s.io/44043 which causes
|
|
// logPath to be a broken symlink to some magical Docker path. Ideally we
|
|
// wouldn't have to deal with this, but until that issue is fixed we have to
|
|
// remove the path if it's a broken symlink.
|
|
func ensureSaneLogPath(logPath string) error {
|
|
// If the path exists but the resolved path does not, then we have a broken
|
|
// symlink and we need to remove it.
|
|
fi, err := os.Lstat(logPath)
|
|
if err != nil || fi.Mode()&os.ModeSymlink == 0 {
|
|
// Non-existent files and non-symlinks aren't our problem.
|
|
return nil
|
|
}
|
|
|
|
_, err = os.Stat(logPath)
|
|
if os.IsNotExist(err) {
|
|
err = os.RemoveAll(logPath)
|
|
if err != nil {
|
|
return fmt.Errorf("ensureSaneLogPath remove bad logPath: %s", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addSecretsBindMounts mounts user defined secrets to the container
|
|
func addSecretsBindMounts(mountLabel, ctrRunDir string, defaultMounts []string, specgen generate.Generator) ([]rspec.Mount, error) {
|
|
containerMounts := specgen.Spec().Mounts
|
|
mounts, err := secretMounts(defaultMounts, mountLabel, ctrRunDir, containerMounts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return mounts, nil
|
|
}
|
|
|
|
// CreateContainer creates a new container in specified PodSandbox
|
|
func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) {
|
|
const operation = "create_container"
|
|
defer func() {
|
|
recordOperation(operation, time.Now())
|
|
recordError(operation, err)
|
|
}()
|
|
logrus.Debugf("CreateContainerRequest %+v", req)
|
|
|
|
s.updateLock.RLock()
|
|
defer s.updateLock.RUnlock()
|
|
|
|
sbID := req.PodSandboxId
|
|
if sbID == "" {
|
|
return nil, fmt.Errorf("PodSandboxId should not be empty")
|
|
}
|
|
|
|
sandboxID, err := s.PodIDIndex().Get(sbID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", sbID, err)
|
|
}
|
|
|
|
sb := s.getSandbox(sandboxID)
|
|
if sb == nil {
|
|
return nil, fmt.Errorf("specified sandbox not found: %s", sandboxID)
|
|
}
|
|
|
|
// The config of the container
|
|
containerConfig := req.GetConfig()
|
|
if containerConfig == nil {
|
|
return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig is nil")
|
|
}
|
|
|
|
if containerConfig.GetMetadata() == nil {
|
|
return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Metadata is nil")
|
|
}
|
|
|
|
name := containerConfig.GetMetadata().GetName()
|
|
if name == "" {
|
|
return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Name is empty")
|
|
}
|
|
|
|
containerID, containerName, err := s.generateContainerIDandName(sb.Metadata(), containerConfig)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer func() {
|
|
if err != nil {
|
|
s.ReleaseContainerName(containerName)
|
|
}
|
|
}()
|
|
|
|
container, err := s.createSandboxContainer(ctx, containerID, containerName, sb, req.GetSandboxConfig(), containerConfig)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
err2 := s.StorageRuntimeServer().DeleteContainer(containerID)
|
|
if err2 != nil {
|
|
logrus.Warnf("Failed to cleanup container directory: %v", err2)
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err = s.Runtime().CreateContainer(container, sb.CgroupParent()); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s.addContainer(container)
|
|
|
|
if err = s.CtrIDIndex().Add(containerID); err != nil {
|
|
s.removeContainer(container)
|
|
return nil, err
|
|
}
|
|
|
|
s.ContainerStateToDisk(container)
|
|
|
|
resp := &pb.CreateContainerResponse{
|
|
ContainerId: containerID,
|
|
}
|
|
|
|
logrus.Debugf("CreateContainerResponse: %+v", resp)
|
|
return resp, nil
|
|
}
|
|
|
|
func (s *Server) setupOCIHooks(specgen *generate.Generator, sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, command string) error {
|
|
mounts := containerConfig.GetMounts()
|
|
addedHooks := map[string]struct{}{}
|
|
addHook := func(hook lib.HookParams) error {
|
|
// Only add a hook once
|
|
if _, ok := addedHooks[hook.Hook]; !ok {
|
|
if err := addOCIHook(specgen, hook); err != nil {
|
|
return err
|
|
}
|
|
addedHooks[hook.Hook] = struct{}{}
|
|
}
|
|
return nil
|
|
}
|
|
for _, hook := range s.Hooks() {
|
|
logrus.Debugf("SetupOCIHooks", hook)
|
|
if hook.HasBindMounts && len(mounts) > 0 {
|
|
if err := addHook(hook); err != nil {
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
for _, cmd := range hook.Cmds {
|
|
match, err := regexp.MatchString(cmd, command)
|
|
if err != nil {
|
|
logrus.Errorf("Invalid regex %q:%q", cmd, err)
|
|
continue
|
|
}
|
|
if match {
|
|
if err := addHook(hook); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
for _, annotationRegex := range hook.Annotations {
|
|
for _, annotation := range containerConfig.GetAnnotations() {
|
|
match, err := regexp.MatchString(annotationRegex, annotation)
|
|
if err != nil {
|
|
logrus.Errorf("Invalid regex %q:%q", annotationRegex, err)
|
|
continue
|
|
}
|
|
if match {
|
|
if err := addHook(hook); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
for _, annotation := range sb.Annotations() {
|
|
match, err := regexp.MatchString(annotationRegex, annotation)
|
|
if err != nil {
|
|
logrus.Errorf("Invalid regex %q:%q", annotationRegex, err)
|
|
continue
|
|
}
|
|
if match {
|
|
if err := addHook(hook); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
func (s *Server) createSandboxContainer(ctx context.Context, containerID string, containerName string, sb *sandbox.Sandbox, sandboxConfig *pb.PodSandboxConfig, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
|
|
if sb == nil {
|
|
return nil, errors.New("createSandboxContainer needs a sandbox")
|
|
}
|
|
|
|
// TODO: simplify this function (cyclomatic complexity here is high)
|
|
// TODO: factor generating/updating the spec into something other projects can vendor
|
|
|
|
// creates a spec Generator with the default spec.
|
|
specgen := generate.New()
|
|
specgen.HostSpecific = true
|
|
specgen.ClearProcessRlimits()
|
|
|
|
var readOnlyRootfs bool
|
|
var privileged bool
|
|
if containerConfig.GetLinux().GetSecurityContext() != nil {
|
|
if containerConfig.GetLinux().GetSecurityContext().Privileged {
|
|
privileged = true
|
|
}
|
|
|
|
if containerConfig.GetLinux().GetSecurityContext().ReadonlyRootfs {
|
|
readOnlyRootfs = true
|
|
specgen.SetRootReadonly(true)
|
|
}
|
|
}
|
|
|
|
mountLabel := sb.MountLabel()
|
|
processLabel := sb.ProcessLabel()
|
|
selinuxConfig := containerConfig.GetLinux().GetSecurityContext().GetSelinuxOptions()
|
|
if selinuxConfig != nil {
|
|
var err error
|
|
processLabel, mountLabel, err = getSELinuxLabels(selinuxConfig, privileged)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
containerVolumes, ociMounts, err := addOCIBindMounts(mountLabel, containerConfig, &specgen)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
volumesJSON, err := json.Marshal(containerVolumes)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
specgen.AddAnnotation(annotations.Volumes, string(volumesJSON))
|
|
|
|
mnt := rspec.Mount{
|
|
Destination: "/sys/fs/cgroup",
|
|
Type: "cgroup",
|
|
Source: "cgroup",
|
|
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
|
}
|
|
// Add cgroup mount so container process can introspect its own limits
|
|
specgen.AddMount(mnt)
|
|
|
|
if err := addDevices(sb, containerConfig, &specgen); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
labels := containerConfig.GetLabels()
|
|
|
|
if err := validateLabels(labels); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
metadata := containerConfig.GetMetadata()
|
|
|
|
kubeAnnotations := containerConfig.GetAnnotations()
|
|
if kubeAnnotations != nil {
|
|
for k, v := range kubeAnnotations {
|
|
specgen.AddAnnotation(k, v)
|
|
}
|
|
}
|
|
if labels != nil {
|
|
for k, v := range labels {
|
|
specgen.AddAnnotation(k, v)
|
|
}
|
|
}
|
|
|
|
// set this container's apparmor profile if it is set by sandbox
|
|
if s.appArmorEnabled && !privileged {
|
|
|
|
appArmorProfileName := s.getAppArmorProfileName(containerConfig.GetLinux().GetSecurityContext().GetApparmorProfile())
|
|
if appArmorProfileName != "" {
|
|
// reload default apparmor profile if it is unloaded.
|
|
if s.appArmorProfile == apparmor.DefaultApparmorProfile {
|
|
if err := apparmor.EnsureDefaultApparmorProfile(); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
specgen.SetProcessApparmorProfile(appArmorProfileName)
|
|
}
|
|
|
|
}
|
|
|
|
logPath := containerConfig.GetLogPath()
|
|
sboxLogDir := sandboxConfig.GetLogDirectory()
|
|
if sboxLogDir == "" {
|
|
sboxLogDir = sb.LogDir()
|
|
}
|
|
if logPath == "" {
|
|
logPath = filepath.Join(sboxLogDir, containerID+".log")
|
|
}
|
|
if !filepath.IsAbs(logPath) {
|
|
// XXX: It's not really clear what this should be versus the sbox logDirectory.
|
|
logrus.Warnf("requested logPath for ctr id %s is a relative path: %s", containerID, logPath)
|
|
logPath = filepath.Join(sboxLogDir, logPath)
|
|
logrus.Warnf("logPath from relative path is now absolute: %s", logPath)
|
|
}
|
|
|
|
// Handle https://issues.k8s.io/44043
|
|
if err := ensureSaneLogPath(logPath); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
logrus.WithFields(logrus.Fields{
|
|
"sbox.logdir": sboxLogDir,
|
|
"ctr.logfile": containerConfig.GetLogPath(),
|
|
"log_path": logPath,
|
|
}).Debugf("setting container's log_path")
|
|
|
|
specgen.SetProcessTerminal(containerConfig.Tty)
|
|
if containerConfig.Tty {
|
|
specgen.AddProcessEnv("TERM", "xterm")
|
|
}
|
|
|
|
linux := containerConfig.GetLinux()
|
|
if linux != nil {
|
|
resources := linux.GetResources()
|
|
if resources != nil {
|
|
specgen.SetLinuxResourcesCPUPeriod(uint64(resources.GetCpuPeriod()))
|
|
specgen.SetLinuxResourcesCPUQuota(resources.GetCpuQuota())
|
|
specgen.SetLinuxResourcesCPUShares(uint64(resources.GetCpuShares()))
|
|
specgen.SetLinuxResourcesMemoryLimit(resources.GetMemoryLimitInBytes())
|
|
specgen.SetProcessOOMScoreAdj(int(resources.GetOomScoreAdj()))
|
|
specgen.SetLinuxResourcesCPUCpus(resources.GetCpusetCpus())
|
|
specgen.SetLinuxResourcesCPUMems(resources.GetCpusetMems())
|
|
}
|
|
|
|
var cgPath string
|
|
parent := defaultCgroupfsParent
|
|
useSystemd := s.config.CgroupManager == oci.SystemdCgroupsManager
|
|
if useSystemd {
|
|
parent = defaultSystemdParent
|
|
}
|
|
if sb.CgroupParent() != "" {
|
|
parent = sb.CgroupParent()
|
|
}
|
|
if useSystemd {
|
|
cgPath = parent + ":" + scopePrefix + ":" + containerID
|
|
} else {
|
|
cgPath = filepath.Join(parent, scopePrefix+"-"+containerID)
|
|
}
|
|
specgen.SetLinuxCgroupsPath(cgPath)
|
|
|
|
if privileged {
|
|
specgen.SetupPrivileged(true)
|
|
setOCIBindMountsPrivileged(&specgen)
|
|
} else {
|
|
err = setupCapabilities(&specgen, linux.GetSecurityContext().GetCapabilities())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
specgen.SetProcessSelinuxLabel(processLabel)
|
|
specgen.SetLinuxMountLabel(mountLabel)
|
|
specgen.SetProcessNoNewPrivileges(linux.GetSecurityContext().GetNoNewPrivs())
|
|
|
|
if containerConfig.GetLinux().GetSecurityContext() != nil &&
|
|
!containerConfig.GetLinux().GetSecurityContext().Privileged {
|
|
for _, mp := range []string{
|
|
"/proc/kcore",
|
|
"/proc/latency_stats",
|
|
"/proc/timer_list",
|
|
"/proc/timer_stats",
|
|
"/proc/sched_debug",
|
|
"/proc/scsi",
|
|
"/sys/firmware",
|
|
} {
|
|
specgen.AddLinuxMaskedPaths(mp)
|
|
}
|
|
|
|
for _, rp := range []string{
|
|
"/proc/asound",
|
|
"/proc/bus",
|
|
"/proc/fs",
|
|
"/proc/irq",
|
|
"/proc/sys",
|
|
"/proc/sysrq-trigger",
|
|
} {
|
|
specgen.AddLinuxReadonlyPaths(rp)
|
|
}
|
|
}
|
|
}
|
|
// Join the namespace paths for the pod sandbox container.
|
|
podInfraState := s.Runtime().ContainerStatus(sb.InfraContainer())
|
|
|
|
logrus.Debugf("pod container state %+v", podInfraState)
|
|
|
|
ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid)
|
|
if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.IPCNamespace), ipcNsPath); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
utsNsPath := fmt.Sprintf("/proc/%d/ns/uts", podInfraState.Pid)
|
|
if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.UTSNamespace), utsNsPath); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
|
|
// kubernetes PodSpec specify to use Host PID namespace
|
|
specgen.RemoveLinuxNamespace(string(rspec.PIDNamespace))
|
|
} else if s.config.EnableSharedPIDNamespace {
|
|
// share Pod PID namespace
|
|
pidNsPath := fmt.Sprintf("/proc/%d/ns/pid", podInfraState.Pid)
|
|
if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.PIDNamespace), pidNsPath); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
netNsPath := sb.NetNsPath()
|
|
if netNsPath == "" {
|
|
// The sandbox does not have a permanent namespace,
|
|
// it's on the host one.
|
|
netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid)
|
|
}
|
|
|
|
if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.NetworkNamespace), netNsPath); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
imageSpec := containerConfig.GetImage()
|
|
if imageSpec == nil {
|
|
return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image is nil")
|
|
}
|
|
|
|
image := imageSpec.Image
|
|
if image == "" {
|
|
return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image.Image is empty")
|
|
}
|
|
images, err := s.StorageImageServer().ResolveNames(image)
|
|
if err != nil {
|
|
if err == storage.ErrCannotParseImageID {
|
|
images = append(images, image)
|
|
} else {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// Get imageName and imageRef that are later requested in container status
|
|
status, err := s.StorageImageServer().ImageStatus(s.ImageContext(), images[0])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
imageName := status.Name
|
|
imageRef := status.ID
|
|
if len(status.RepoDigests) > 0 {
|
|
imageRef = status.RepoDigests[0]
|
|
}
|
|
|
|
specgen.AddAnnotation(annotations.Image, image)
|
|
specgen.AddAnnotation(annotations.ImageName, imageName)
|
|
specgen.AddAnnotation(annotations.ImageRef, imageRef)
|
|
specgen.AddAnnotation(annotations.IP, sb.IP())
|
|
|
|
mnt = rspec.Mount{
|
|
Type: "bind",
|
|
Source: sb.ShmPath(),
|
|
Destination: "/etc/shm",
|
|
Options: []string{"rw", "bind"},
|
|
}
|
|
// bind mount the pod shm
|
|
specgen.AddMount(mnt)
|
|
|
|
options := []string{"rw"}
|
|
if readOnlyRootfs {
|
|
options = []string{"ro"}
|
|
}
|
|
if sb.ResolvPath() != "" {
|
|
if err := label.Relabel(sb.ResolvPath(), mountLabel, true); err != nil && err != unix.ENOTSUP {
|
|
return nil, err
|
|
}
|
|
|
|
mnt = rspec.Mount{
|
|
Type: "bind",
|
|
Source: sb.ResolvPath(),
|
|
Destination: "/etc/resolv.conf",
|
|
Options: append(options, "bind"),
|
|
}
|
|
// bind mount the pod resolver file
|
|
specgen.AddMount(mnt)
|
|
}
|
|
|
|
if sb.HostnamePath() != "" {
|
|
if err := label.Relabel(sb.HostnamePath(), mountLabel, true); err != nil && err != unix.ENOTSUP {
|
|
return nil, err
|
|
}
|
|
|
|
mnt = rspec.Mount{
|
|
Type: "bind",
|
|
Source: sb.HostnamePath(),
|
|
Destination: "/etc/hostname",
|
|
Options: append(options, "bind"),
|
|
}
|
|
specgen.AddMount(mnt)
|
|
}
|
|
|
|
isInCRIMounts := func(dst string, mounts []*pb.Mount) bool {
|
|
for _, m := range mounts {
|
|
if m.ContainerPath == dst {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
if !isInCRIMounts("/etc/hosts", containerConfig.GetMounts()) && hostNetwork(containerConfig) {
|
|
// Only bind mount for host netns and when CRI does not give us any hosts file
|
|
mnt = rspec.Mount{
|
|
Type: "bind",
|
|
Source: "/etc/hosts",
|
|
Destination: "/etc/hosts",
|
|
Options: append(options, "bind"),
|
|
}
|
|
specgen.AddMount(mnt)
|
|
}
|
|
|
|
// Set hostname and add env for hostname
|
|
specgen.SetHostname(sb.Hostname())
|
|
specgen.AddProcessEnv("HOSTNAME", sb.Hostname())
|
|
|
|
specgen.AddAnnotation(annotations.Name, containerName)
|
|
specgen.AddAnnotation(annotations.ContainerID, containerID)
|
|
specgen.AddAnnotation(annotations.SandboxID, sb.ID())
|
|
specgen.AddAnnotation(annotations.SandboxName, sb.InfraContainer().Name())
|
|
specgen.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer)
|
|
specgen.AddAnnotation(annotations.LogPath, logPath)
|
|
specgen.AddAnnotation(annotations.TTY, fmt.Sprintf("%v", containerConfig.Tty))
|
|
specgen.AddAnnotation(annotations.Stdin, fmt.Sprintf("%v", containerConfig.Stdin))
|
|
specgen.AddAnnotation(annotations.StdinOnce, fmt.Sprintf("%v", containerConfig.StdinOnce))
|
|
specgen.AddAnnotation(annotations.ResolvPath, sb.InfraContainer().CrioAnnotations()[annotations.ResolvPath])
|
|
|
|
created := time.Now()
|
|
specgen.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano))
|
|
|
|
metadataJSON, err := json.Marshal(metadata)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
specgen.AddAnnotation(annotations.Metadata, string(metadataJSON))
|
|
|
|
labelsJSON, err := json.Marshal(labels)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
specgen.AddAnnotation(annotations.Labels, string(labelsJSON))
|
|
|
|
kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
specgen.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON))
|
|
|
|
spp := containerConfig.GetLinux().GetSecurityContext().GetSeccompProfilePath()
|
|
if !privileged {
|
|
if err = s.setupSeccomp(&specgen, spp); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
specgen.AddAnnotation(annotations.SeccompProfilePath, spp)
|
|
|
|
metaname := metadata.Name
|
|
attempt := metadata.Attempt
|
|
containerInfo, err := s.StorageRuntimeServer().CreateContainer(s.ImageContext(),
|
|
sb.Name(), sb.ID(),
|
|
image, status.ID,
|
|
containerName, containerID,
|
|
metaname,
|
|
attempt,
|
|
mountLabel,
|
|
nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
err2 := s.StorageRuntimeServer().DeleteContainer(containerInfo.ID)
|
|
if err2 != nil {
|
|
logrus.Warnf("Failed to cleanup container directory: %v", err2)
|
|
}
|
|
}
|
|
}()
|
|
|
|
mountPoint, err := s.StorageRuntimeServer().StartContainer(containerID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to mount container %s(%s): %v", containerName, containerID, err)
|
|
}
|
|
specgen.AddAnnotation(annotations.MountPoint, mountPoint)
|
|
|
|
containerImageConfig := containerInfo.Config
|
|
if containerImageConfig == nil {
|
|
err = fmt.Errorf("empty image config for %s", image)
|
|
return nil, err
|
|
}
|
|
|
|
if containerImageConfig.Config.StopSignal != "" {
|
|
// this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
|
|
specgen.AddAnnotation("org.opencontainers.image.stopSignal", containerImageConfig.Config.StopSignal)
|
|
}
|
|
|
|
// Add image volumes
|
|
volumeMounts, err := addImageVolumes(mountPoint, s, &containerInfo, &specgen, mountLabel)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
processArgs, err := buildOCIProcessArgs(containerConfig, containerImageConfig)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
specgen.SetProcessArgs(processArgs)
|
|
|
|
envs := mergeEnvs(containerImageConfig, containerConfig.GetEnvs())
|
|
for _, e := range envs {
|
|
parts := strings.SplitN(e, "=", 2)
|
|
specgen.AddProcessEnv(parts[0], parts[1])
|
|
}
|
|
|
|
// Set working directory
|
|
// Pick it up from image config first and override if specified in CRI
|
|
containerCwd := "/"
|
|
if containerImageConfig != nil {
|
|
imageCwd := containerImageConfig.Config.WorkingDir
|
|
if imageCwd != "" {
|
|
containerCwd = imageCwd
|
|
}
|
|
}
|
|
runtimeCwd := containerConfig.WorkingDir
|
|
if runtimeCwd != "" {
|
|
containerCwd = runtimeCwd
|
|
}
|
|
specgen.SetProcessCwd(containerCwd)
|
|
if err := setupWorkingDirectory(mountPoint, mountLabel, containerCwd); err != nil {
|
|
if err1 := s.StorageRuntimeServer().StopContainer(containerID); err1 != nil {
|
|
return nil, fmt.Errorf("can't umount container after cwd error %v: %v", err, err1)
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
var secretMounts []rspec.Mount
|
|
if len(s.config.DefaultMounts) > 0 {
|
|
var err error
|
|
secretMounts, err = addSecretsBindMounts(mountLabel, containerInfo.RunDir, s.config.DefaultMounts, specgen)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to mount secrets: %v", err)
|
|
}
|
|
}
|
|
|
|
mounts := []rspec.Mount{}
|
|
mounts = append(mounts, ociMounts...)
|
|
mounts = append(mounts, volumeMounts...)
|
|
mounts = append(mounts, secretMounts...)
|
|
|
|
sort.Sort(orderedMounts(mounts))
|
|
|
|
for _, m := range mounts {
|
|
mnt = rspec.Mount{
|
|
Type: "bind",
|
|
Source: m.Source,
|
|
Destination: m.Destination,
|
|
Options: append(m.Options, "bind"),
|
|
}
|
|
specgen.AddMount(mnt)
|
|
}
|
|
|
|
if err := s.setupOCIHooks(&specgen, sb, containerConfig, processArgs[0]); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Setup user and groups
|
|
if linux != nil {
|
|
if err = setupContainerUser(&specgen, mountPoint, linux.GetSecurityContext(), containerImageConfig); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// Set up pids limit if pids cgroup is mounted
|
|
_, err = cgroups.FindCgroupMountpoint("pids")
|
|
if err == nil {
|
|
specgen.SetLinuxResourcesPidsLimit(s.config.PidsLimit)
|
|
}
|
|
|
|
// by default, the root path is an empty string. set it now.
|
|
specgen.SetRootPath(mountPoint)
|
|
|
|
saveOptions := generate.ExportOptions{}
|
|
if err = specgen.SaveToFile(filepath.Join(containerInfo.Dir, "config.json"), saveOptions); err != nil {
|
|
return nil, err
|
|
}
|
|
if err = specgen.SaveToFile(filepath.Join(containerInfo.RunDir, "config.json"), saveOptions); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
crioAnnotations := specgen.Spec().Annotations
|
|
|
|
container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), labels, crioAnnotations, kubeAnnotations, image, imageName, imageRef, metadata, sb.ID(), containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.Privileged(), sb.Trusted(), containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
container.SetSpec(specgen.Spec())
|
|
container.SetMountPoint(mountPoint)
|
|
container.SetSeccompProfilePath(spp)
|
|
|
|
for _, cv := range containerVolumes {
|
|
container.AddVolume(cv)
|
|
}
|
|
|
|
return container, nil
|
|
}
|
|
|
|
func (s *Server) setupSeccomp(specgen *generate.Generator, profile string) error {
|
|
if profile == "" {
|
|
// running w/o seccomp, aka unconfined
|
|
specgen.Spec().Linux.Seccomp = nil
|
|
return nil
|
|
}
|
|
if !s.seccompEnabled {
|
|
if profile != seccompUnconfined {
|
|
return fmt.Errorf("seccomp is not enabled in your kernel, cannot run with a profile")
|
|
}
|
|
logrus.Warn("seccomp is not enabled in your kernel, running container without profile")
|
|
}
|
|
if profile == seccompUnconfined {
|
|
// running w/o seccomp, aka unconfined
|
|
specgen.Spec().Linux.Seccomp = nil
|
|
return nil
|
|
}
|
|
if profile == seccompRuntimeDefault || profile == seccompDockerDefault {
|
|
return seccomp.LoadProfileFromStruct(s.seccompProfile, specgen)
|
|
}
|
|
if !strings.HasPrefix(profile, seccompLocalhostPrefix) {
|
|
return fmt.Errorf("unknown seccomp profile option: %q", profile)
|
|
}
|
|
fname := strings.TrimPrefix(profile, "localhost/")
|
|
file, err := ioutil.ReadFile(filepath.FromSlash(fname))
|
|
if err != nil {
|
|
return fmt.Errorf("cannot load seccomp profile %q: %v", fname, err)
|
|
}
|
|
return seccomp.LoadProfileFromBytes(file, specgen)
|
|
}
|
|
|
|
// getAppArmorProfileName gets the profile name for the given container.
|
|
func (s *Server) getAppArmorProfileName(profile string) string {
|
|
if profile == "" {
|
|
return ""
|
|
}
|
|
|
|
if profile == apparmor.ProfileRuntimeDefault {
|
|
// If the value is runtime/default, then return default profile.
|
|
return s.appArmorProfile
|
|
}
|
|
|
|
return strings.TrimPrefix(profile, apparmor.ProfileNamePrefix)
|
|
}
|
|
|
|
// openContainerFile opens a file inside a container rootfs safely
|
|
func openContainerFile(rootfs string, path string) (io.ReadCloser, error) {
|
|
fp, err := symlink.FollowSymlinkInScope(filepath.Join(rootfs, path), rootfs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return os.Open(fp)
|
|
}
|
|
|
|
// getUserInfo returns UID, GID and additional groups for specified user
|
|
// by looking them up in /etc/passwd and /etc/group
|
|
func getUserInfo(rootfs string, userName string) (uint32, uint32, []uint32, error) {
|
|
// We don't care if we can't open the file because
|
|
// not all images will have these files
|
|
passwdFile, err := openContainerFile(rootfs, "/etc/passwd")
|
|
if err != nil {
|
|
logrus.Warnf("Failed to open /etc/passwd: %v", err)
|
|
} else {
|
|
defer passwdFile.Close()
|
|
}
|
|
|
|
groupFile, err := openContainerFile(rootfs, "/etc/group")
|
|
if err != nil {
|
|
logrus.Warnf("Failed to open /etc/group: %v", err)
|
|
} else {
|
|
defer groupFile.Close()
|
|
}
|
|
|
|
execUser, err := user.GetExecUser(userName, nil, passwdFile, groupFile)
|
|
if err != nil {
|
|
return 0, 0, nil, err
|
|
}
|
|
|
|
uid := uint32(execUser.Uid)
|
|
gid := uint32(execUser.Gid)
|
|
var additionalGids []uint32
|
|
for _, g := range execUser.Sgids {
|
|
additionalGids = append(additionalGids, uint32(g))
|
|
}
|
|
|
|
return uid, gid, additionalGids, nil
|
|
}
|
|
|
|
func setOCIBindMountsPrivileged(g *generate.Generator) {
|
|
spec := g.Spec()
|
|
// clear readonly for /sys and cgroup
|
|
for i, m := range spec.Mounts {
|
|
if spec.Mounts[i].Destination == "/sys" && !spec.Root.Readonly {
|
|
clearReadOnly(&spec.Mounts[i])
|
|
}
|
|
if m.Type == "cgroup" {
|
|
clearReadOnly(&spec.Mounts[i])
|
|
}
|
|
}
|
|
spec.Linux.ReadonlyPaths = nil
|
|
spec.Linux.MaskedPaths = nil
|
|
}
|
|
|
|
func clearReadOnly(m *rspec.Mount) {
|
|
var opt []string
|
|
for _, o := range m.Options {
|
|
if o != "ro" {
|
|
opt = append(opt, o)
|
|
}
|
|
}
|
|
m.Options = opt
|
|
}
|
|
|
|
func setupWorkingDirectory(rootfs, mountLabel, containerCwd string) error {
|
|
fp, err := symlink.FollowSymlinkInScope(filepath.Join(rootfs, containerCwd), rootfs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(fp, 0755); err != nil {
|
|
return err
|
|
}
|
|
if mountLabel != "" {
|
|
if err1 := label.Relabel(fp, mountLabel, true); err1 != nil && err1 != unix.ENOTSUP {
|
|
return fmt.Errorf("relabel failed %s: %v", fp, err1)
|
|
}
|
|
}
|
|
return nil
|
|
}
|