2016-11-22 16:49:54 +00:00
package server
import (
"encoding/json"
"errors"
"fmt"
2017-03-29 18:16:53 +00:00
"io"
2017-09-27 18:46:31 +00:00
"io/ioutil"
2017-03-29 18:16:53 +00:00
"os"
2016-11-22 16:49:54 +00:00
"path/filepath"
2017-08-12 10:29:22 +00:00
"regexp"
2017-10-28 21:43:20 +00:00
"sort"
2017-03-29 18:23:33 +00:00
"strconv"
2016-11-23 09:41:48 +00:00
"strings"
2017-05-11 09:22:47 +00:00
"time"
2016-11-22 16:49:54 +00:00
2017-08-14 19:29:53 +00:00
"github.com/docker/distribution/reference"
2017-09-27 12:49:29 +00:00
dockermounts "github.com/docker/docker/pkg/mount"
2017-07-14 22:32:25 +00:00
"github.com/docker/docker/pkg/stringid"
2017-03-29 18:16:53 +00:00
"github.com/docker/docker/pkg/symlink"
2017-07-25 19:16:43 +00:00
"github.com/kubernetes-incubator/cri-o/libkpod"
2017-07-19 19:03:22 +00:00
"github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
2016-11-22 16:49:54 +00:00
"github.com/kubernetes-incubator/cri-o/oci"
2017-06-01 16:40:33 +00:00
"github.com/kubernetes-incubator/cri-o/pkg/annotations"
2017-07-14 22:32:25 +00:00
"github.com/kubernetes-incubator/cri-o/pkg/storage"
2016-11-29 12:34:15 +00:00
"github.com/kubernetes-incubator/cri-o/server/apparmor"
2016-11-23 09:41:48 +00:00
"github.com/kubernetes-incubator/cri-o/server/seccomp"
2017-03-16 09:59:41 +00:00
"github.com/opencontainers/image-spec/specs-go/v1"
2017-07-07 21:43:35 +00:00
"github.com/opencontainers/runc/libcontainer/cgroups"
2017-05-08 22:10:09 +00:00
"github.com/opencontainers/runc/libcontainer/devices"
2017-03-29 18:18:35 +00:00
"github.com/opencontainers/runc/libcontainer/user"
2017-05-08 22:10:09 +00:00
rspec "github.com/opencontainers/runtime-spec/specs-go"
2016-11-22 16:49:54 +00:00
"github.com/opencontainers/runtime-tools/generate"
2017-03-22 17:58:35 +00:00
"github.com/opencontainers/selinux/go-selinux/label"
2017-08-05 11:40:46 +00:00
"github.com/sirupsen/logrus"
2016-11-22 16:49:54 +00:00
"golang.org/x/net/context"
2017-06-28 15:47:31 +00:00
"golang.org/x/sys/unix"
2017-08-04 11:13:19 +00:00
pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
2016-11-22 16:49:54 +00:00
)
2016-11-23 09:41:48 +00:00
const (
seccompUnconfined = "unconfined"
seccompRuntimeDefault = "runtime/default"
2017-09-18 10:50:39 +00:00
seccompDockerDefault = "docker/default"
2016-11-23 09:41:48 +00:00
seccompLocalhostPrefix = "localhost/"
2017-08-29 21:11:30 +00:00
scopePrefix = "crio"
defaultCgroupfsParent = "/crio"
defaultSystemdParent = "system.slice"
2016-11-23 09:41:48 +00:00
)
2017-10-28 21:43:20 +00:00
type orderedMounts [ ] rspec . Mount
// Len returns the number of mounts. Used in sorting.
func ( m orderedMounts ) Len ( ) int {
return len ( m )
}
// Less returns true if the number of parts (a/b/c would be 3 parts) in the
// mount indexed by parameter 1 is less than that of the mount indexed by
// parameter 2. Used in sorting.
func ( m orderedMounts ) Less ( i , j int ) bool {
return m . parts ( i ) < m . parts ( j )
}
// Swap swaps two items in an array of mounts. Used in sorting
func ( m orderedMounts ) Swap ( i , j int ) {
m [ i ] , m [ j ] = m [ j ] , m [ i ]
}
// parts returns the number of parts in the destination of a mount. Used in sorting.
func ( m orderedMounts ) parts ( i int ) int {
return strings . Count ( filepath . Clean ( m [ i ] . Destination ) , string ( os . PathSeparator ) )
}
func addOCIBindMounts ( mountLabel string , containerConfig * pb . ContainerConfig , specgen * generate . Generator ) ( [ ] oci . ContainerVolume , [ ] rspec . Mount , error ) {
2017-08-14 19:52:25 +00:00
volumes := [ ] oci . ContainerVolume { }
2017-10-28 21:43:20 +00:00
ociMounts := [ ] rspec . Mount { }
2017-02-22 18:42:44 +00:00
mounts := containerConfig . GetMounts ( )
for _ , mount := range mounts {
dest := mount . ContainerPath
if dest == "" {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "Mount.ContainerPath is empty" )
2017-02-22 18:42:44 +00:00
}
src := mount . HostPath
if src == "" {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "Mount.HostPath is empty" )
2017-02-22 18:42:44 +00:00
}
2017-06-01 14:09:39 +00:00
if _ , err := os . Stat ( src ) ; err != nil && os . IsNotExist ( err ) {
2017-06-16 22:49:16 +00:00
if err1 := os . MkdirAll ( src , 0644 ) ; err1 != nil {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "Failed to mkdir %s: %s" , src , err )
2017-06-16 22:49:16 +00:00
}
2017-06-01 14:09:39 +00:00
}
2017-09-18 13:53:48 +00:00
src , err := resolveSymbolicLink ( src )
if err != nil {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "failed to resolve symlink %q: %v" , src , err )
2017-09-18 13:53:48 +00:00
}
2017-02-22 18:42:44 +00:00
options := [ ] string { "rw" }
if mount . Readonly {
options = [ ] string { "ro" }
}
2017-10-20 22:17:15 +00:00
options = append ( options , "rbind" )
2017-02-22 18:42:44 +00:00
2017-09-27 12:49:29 +00:00
// mount propagation
mountInfos , err := dockermounts . GetMounts ( )
if err != nil {
return nil , nil , err
}
switch mount . GetPropagation ( ) {
case pb . MountPropagation_PROPAGATION_PRIVATE :
options = append ( options , "rprivate" )
// Since default root propagation in runc is rprivate ignore
// setting the root propagation
case pb . MountPropagation_PROPAGATION_BIDIRECTIONAL :
if err := ensureShared ( src , mountInfos ) ; err != nil {
return nil , nil , err
}
options = append ( options , "rshared" )
specgen . SetLinuxRootPropagation ( "rshared" )
case pb . MountPropagation_PROPAGATION_HOST_TO_CONTAINER :
if err := ensureSharedOrSlave ( src , mountInfos ) ; err != nil {
return nil , nil , err
}
options = append ( options , "rslave" )
if specgen . Spec ( ) . Linux . RootfsPropagation != "rshared" &&
specgen . Spec ( ) . Linux . RootfsPropagation != "rslave" {
specgen . SetLinuxRootPropagation ( "rslave" )
}
default :
logrus . Warnf ( "Unknown propagation mode for hostPath %q" , mount . HostPath )
options = append ( options , "rprivate" )
}
2017-02-22 18:42:44 +00:00
if mount . SelinuxRelabel {
// Need a way in kubernetes to determine if the volume is shared or private
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( src , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "relabel failed %s: %v" , src , err )
2017-02-22 18:42:44 +00:00
}
}
2017-08-14 19:52:25 +00:00
volumes = append ( volumes , oci . ContainerVolume {
ContainerPath : dest ,
HostPath : src ,
Readonly : mount . Readonly ,
} )
2017-10-28 21:43:20 +00:00
ociMounts = append ( ociMounts , rspec . Mount {
Source : src ,
Destination : dest ,
Options : options ,
} )
2017-02-22 18:42:44 +00:00
}
2017-10-28 21:43:20 +00:00
return volumes , ociMounts , nil
2017-02-22 18:42:44 +00:00
}
2017-09-27 12:49:29 +00:00
// Ensure mount point on which path is mounted, is shared.
func ensureShared ( path string , mountInfos [ ] * dockermounts . Info ) error {
sourceMount , optionalOpts , err := getSourceMount ( path , mountInfos )
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings . Split ( optionalOpts , " " )
for _ , opt := range optsSplit {
if strings . HasPrefix ( opt , "shared:" ) {
return nil
}
}
return fmt . Errorf ( "path %q is mounted on %q but it is not a shared mount" , path , sourceMount )
}
// Ensure mount point on which path is mounted, is either shared or slave.
func ensureSharedOrSlave ( path string , mountInfos [ ] * dockermounts . Info ) error {
sourceMount , optionalOpts , err := getSourceMount ( path , mountInfos )
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings . Split ( optionalOpts , " " )
for _ , opt := range optsSplit {
if strings . HasPrefix ( opt , "shared:" ) {
return nil
} else if strings . HasPrefix ( opt , "master:" ) {
return nil
}
}
return fmt . Errorf ( "path %q is mounted on %q but it is not a shared or slave mount" , path , sourceMount )
}
func getMountInfo ( mountInfos [ ] * dockermounts . Info , dir string ) * dockermounts . Info {
for _ , m := range mountInfos {
if m . Mountpoint == dir {
return m
}
}
return nil
}
func getSourceMount ( source string , mountInfos [ ] * dockermounts . Info ) ( string , string , error ) {
mountinfo := getMountInfo ( mountInfos , source )
if mountinfo != nil {
return source , mountinfo . Optional , nil
}
path := source
for {
path = filepath . Dir ( path )
mountinfo = getMountInfo ( mountInfos , path )
if mountinfo != nil {
return path , mountinfo . Optional , nil
}
if path == "/" {
break
}
}
// If we are here, we did not find parent mount. Something is wrong.
return "" , "" , fmt . Errorf ( "Could not find source mount of %s" , source )
}
2017-10-28 21:43:20 +00:00
func addImageVolumes ( rootfs string , s * Server , containerInfo * storage . ContainerInfo , specgen * generate . Generator , mountLabel string ) ( [ ] rspec . Mount , error ) {
mounts := [ ] rspec . Mount { }
2017-07-14 22:32:25 +00:00
for dest := range containerInfo . Config . Config . Volumes {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , dest ) , rootfs )
if err != nil {
2017-10-28 21:43:20 +00:00
return nil , err
2017-07-14 22:32:25 +00:00
}
switch s . config . ImageVolumes {
2017-07-25 19:16:43 +00:00
case libkpod . ImageVolumesMkdir :
2017-07-14 22:32:25 +00:00
if err1 := os . MkdirAll ( fp , 0644 ) ; err1 != nil {
2017-10-28 21:43:20 +00:00
return nil , err1
2017-07-14 22:32:25 +00:00
}
2017-07-25 19:16:43 +00:00
case libkpod . ImageVolumesBind :
2017-07-14 22:32:25 +00:00
volumeDirName := stringid . GenerateNonCryptoID ( )
src := filepath . Join ( containerInfo . RunDir , "mounts" , volumeDirName )
if err1 := os . MkdirAll ( src , 0644 ) ; err1 != nil {
2017-10-28 21:43:20 +00:00
return nil , err1
2017-07-14 22:32:25 +00:00
}
// Label the source with the sandbox selinux mount label
if mountLabel != "" {
if err1 := label . Relabel ( src , mountLabel , true ) ; err1 != nil && err1 != unix . ENOTSUP {
2017-10-28 21:43:20 +00:00
return nil , fmt . Errorf ( "relabel failed %s: %v" , src , err1 )
2017-07-14 22:32:25 +00:00
}
}
logrus . Debugf ( "Adding bind mounted volume: %s to %s" , src , dest )
2017-10-28 21:43:20 +00:00
mounts = append ( mounts , rspec . Mount {
Source : src ,
Destination : dest ,
Options : [ ] string { "rw" } ,
} )
2017-07-25 19:16:43 +00:00
case libkpod . ImageVolumesIgnore :
2017-07-14 22:32:25 +00:00
logrus . Debugf ( "Ignoring volume %v" , dest )
default :
logrus . Fatalf ( "Unrecognized image volumes setting" )
}
}
2017-10-28 21:43:20 +00:00
return mounts , nil
2017-07-14 22:32:25 +00:00
}
2017-09-06 15:04:18 +00:00
// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved
// path; if not, returns the original path.
func resolveSymbolicLink ( path string ) ( string , error ) {
info , err := os . Lstat ( path )
if err != nil {
return "" , err
}
if info . Mode ( ) & os . ModeSymlink != os . ModeSymlink {
return path , nil
}
return filepath . EvalSymlinks ( path )
}
2017-07-19 19:03:22 +00:00
func addDevices ( sb * sandbox . Sandbox , containerConfig * pb . ContainerConfig , specgen * generate . Generator ) error {
2017-05-08 22:10:09 +00:00
sp := specgen . Spec ( )
2017-10-09 19:53:54 +00:00
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetPrivileged ( ) {
2017-09-06 15:04:18 +00:00
hostDevices , err := devices . HostDevices ( )
if err != nil {
return err
}
for _ , hostDevice := range hostDevices {
rd := rspec . LinuxDevice {
Path : hostDevice . Path ,
Type : string ( hostDevice . Type ) ,
Major : hostDevice . Major ,
Minor : hostDevice . Minor ,
UID : & hostDevice . Uid ,
GID : & hostDevice . Gid ,
}
if hostDevice . Major == 0 && hostDevice . Minor == 0 {
// Invalid device, most likely a symbolic link, skip it.
continue
}
specgen . AddDevice ( rd )
}
sp . Linux . Resources . Devices = [ ] rspec . LinuxDeviceCgroup {
{
Allow : true ,
Access : "rwm" ,
} ,
}
return nil
}
2017-05-08 22:10:09 +00:00
for _ , device := range containerConfig . GetDevices ( ) {
2017-09-06 15:04:18 +00:00
path , err := resolveSymbolicLink ( device . HostPath )
2017-05-08 22:10:09 +00:00
if err != nil {
2017-09-06 15:04:18 +00:00
return err
2017-05-08 22:10:09 +00:00
}
2017-09-06 15:04:18 +00:00
dev , err := devices . DeviceFromPath ( path , device . Permissions )
// if there was no error, return the device
if err == nil {
rd := rspec . LinuxDevice {
Path : device . ContainerPath ,
Type : string ( dev . Type ) ,
Major : dev . Major ,
Minor : dev . Minor ,
UID : & dev . Uid ,
GID : & dev . Gid ,
}
specgen . AddDevice ( rd )
sp . Linux . Resources . Devices = append ( sp . Linux . Resources . Devices , rspec . LinuxDeviceCgroup {
Allow : true ,
Type : string ( dev . Type ) ,
Major : & dev . Major ,
Minor : & dev . Minor ,
Access : dev . Permissions ,
} )
continue
}
// if the device is not a device node
// try to see if it's a directory holding many devices
if err == devices . ErrNotADevice {
// check if it is a directory
if src , e := os . Stat ( path ) ; e == nil && src . IsDir ( ) {
// mount the internal devices recursively
filepath . Walk ( path , func ( dpath string , f os . FileInfo , e error ) error {
childDevice , e := devices . DeviceFromPath ( dpath , device . Permissions )
if e != nil {
// ignore the device
return nil
}
cPath := strings . Replace ( dpath , path , device . ContainerPath , 1 )
rd := rspec . LinuxDevice {
Path : cPath ,
Type : string ( childDevice . Type ) ,
Major : childDevice . Major ,
Minor : childDevice . Minor ,
UID : & childDevice . Uid ,
GID : & childDevice . Gid ,
}
specgen . AddDevice ( rd )
sp . Linux . Resources . Devices = append ( sp . Linux . Resources . Devices , rspec . LinuxDeviceCgroup {
Allow : true ,
Type : string ( childDevice . Type ) ,
Major : & childDevice . Major ,
Minor : & childDevice . Minor ,
Access : childDevice . Permissions ,
} )
return nil
} )
}
2017-05-08 22:10:09 +00:00
}
}
return nil
}
2017-03-16 09:59:41 +00:00
// buildOCIProcessArgs build an OCI compatible process arguments slice.
func buildOCIProcessArgs ( containerKubeConfig * pb . ContainerConfig , imageOCIConfig * v1 . Image ) ( [ ] string , error ) {
2017-06-07 16:38:04 +00:00
//# Start the nginx container using the default command, but use custom
//arguments (arg1 .. argN) for that command.
//kubectl run nginx --image=nginx -- <arg1> <arg2> ... <argN>
//# Start the nginx container using a different command and custom arguments.
//kubectl run nginx --image=nginx --command -- <cmd> <arg1> ... <argN>
2017-03-16 09:59:41 +00:00
kubeCommands := containerKubeConfig . Command
kubeArgs := containerKubeConfig . Args
2017-06-07 16:38:04 +00:00
// merge image config and kube config
// same as docker does today...
if imageOCIConfig != nil {
if len ( kubeCommands ) == 0 {
if len ( kubeArgs ) == 0 {
kubeArgs = imageOCIConfig . Config . Cmd
}
if kubeCommands == nil {
kubeCommands = imageOCIConfig . Config . Entrypoint
}
}
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
if len ( kubeCommands ) == 0 && len ( kubeArgs ) == 0 {
return nil , fmt . Errorf ( "no command specified" )
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
// create entrypoint and args
var entrypoint string
var args [ ] string
if len ( kubeCommands ) != 0 {
entrypoint = kubeCommands [ 0 ]
args = append ( kubeCommands [ 1 : ] , kubeArgs ... )
2017-03-16 09:59:41 +00:00
} else {
2017-06-07 16:38:04 +00:00
entrypoint = kubeArgs [ 0 ]
args = kubeArgs [ 1 : ]
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
processArgs := append ( [ ] string { entrypoint } , args ... )
2017-03-16 09:59:41 +00:00
logrus . Debugf ( "OCI process args %v" , processArgs )
return processArgs , nil
}
2017-08-12 10:29:22 +00:00
// addOCIHook look for hooks programs installed in hooksDirPath and add them to spec
func addOCIHook ( specgen * generate . Generator , hook libkpod . HookParams ) error {
logrus . Debugf ( "AddOCIHook" , hook )
for _ , stage := range hook . Stage {
switch stage {
case "prestart" :
specgen . AddPreStartHook ( hook . Hook , [ ] string { hook . Hook , "prestart" } )
case "poststart" :
specgen . AddPostStartHook ( hook . Hook , [ ] string { hook . Hook , "poststart" } )
case "poststop" :
specgen . AddPostStopHook ( hook . Hook , [ ] string { hook . Hook , "poststop" } )
}
}
return nil
}
2017-03-29 18:23:33 +00:00
// setupContainerUser sets the UID, GID and supplemental groups in OCI runtime config
func setupContainerUser ( specgen * generate . Generator , rootfs string , sc * pb . LinuxContainerSecurityContext , imageConfig * v1 . Image ) error {
if sc != nil {
containerUser := ""
// Case 1: run as user is set by kubelet
2017-09-13 11:06:05 +00:00
if sc . GetRunAsUser ( ) != nil {
2017-03-29 18:23:33 +00:00
containerUser = strconv . FormatInt ( sc . GetRunAsUser ( ) . Value , 10 )
} else {
// Case 2: run as username is set by kubelet
2017-09-13 11:06:05 +00:00
userName := sc . GetRunAsUsername ( )
2017-03-29 18:23:33 +00:00
if userName != "" {
containerUser = userName
} else {
// Case 3: get user from image config
2017-04-04 22:39:59 +00:00
if imageConfig != nil {
imageUser := imageConfig . Config . User
if imageUser != "" {
containerUser = imageUser
}
2017-03-29 18:23:33 +00:00
}
}
}
logrus . Debugf ( "CONTAINER USER: %+v" , containerUser )
// Add uid, gid and groups from user
uid , gid , addGroups , err1 := getUserInfo ( rootfs , containerUser )
if err1 != nil {
return err1
}
logrus . Debugf ( "UID: %v, GID: %v, Groups: %+v" , uid , gid , addGroups )
specgen . SetProcessUID ( uid )
specgen . SetProcessGID ( gid )
for _ , group := range addGroups {
specgen . AddProcessAdditionalGid ( group )
}
// Add groups from CRI
2017-09-13 11:06:05 +00:00
groups := sc . GetSupplementalGroups ( )
2017-03-29 18:23:33 +00:00
for _ , group := range groups {
specgen . AddProcessAdditionalGid ( uint32 ( group ) )
}
}
return nil
}
2017-04-22 00:54:29 +00:00
func hostNetwork ( containerConfig * pb . ContainerConfig ) bool {
securityContext := containerConfig . GetLinux ( ) . GetSecurityContext ( )
if securityContext == nil || securityContext . GetNamespaceOptions ( ) == nil {
return false
}
return securityContext . GetNamespaceOptions ( ) . HostNetwork
}
2017-04-04 14:11:53 +00:00
// ensureSaneLogPath is a hack to fix https://issues.k8s.io/44043 which causes
// logPath to be a broken symlink to some magical Docker path. Ideally we
// wouldn't have to deal with this, but until that issue is fixed we have to
// remove the path if it's a broken symlink.
func ensureSaneLogPath ( logPath string ) error {
// If the path exists but the resolved path does not, then we have a broken
// symlink and we need to remove it.
fi , err := os . Lstat ( logPath )
if err != nil || fi . Mode ( ) & os . ModeSymlink == 0 {
2017-08-15 02:15:01 +00:00
// Non-existent files and non-symlinks aren't our problem.
2017-04-04 14:11:53 +00:00
return nil
}
_ , err = os . Stat ( logPath )
if os . IsNotExist ( err ) {
err = os . RemoveAll ( logPath )
if err != nil {
return fmt . Errorf ( "ensureSaneLogPath remove bad logPath: %s" , err )
}
}
return nil
}
2017-09-22 15:10:15 +00:00
// addSecretsBindMounts mounts user defined secrets to the container
2017-10-28 21:43:20 +00:00
func addSecretsBindMounts ( mountLabel , ctrRunDir string , defaultMounts [ ] string , specgen generate . Generator ) ( [ ] rspec . Mount , error ) {
2017-10-12 18:14:42 +00:00
containerMounts := specgen . Spec ( ) . Mounts
mounts , err := secretMounts ( defaultMounts , mountLabel , ctrRunDir , containerMounts )
if err != nil {
2017-10-28 21:43:20 +00:00
return nil , err
2017-09-22 15:10:15 +00:00
}
2017-10-28 21:43:20 +00:00
return mounts , nil
2017-09-22 15:10:15 +00:00
}
2016-11-22 16:49:54 +00:00
// CreateContainer creates a new container in specified PodSandbox
func ( s * Server ) CreateContainer ( ctx context . Context , req * pb . CreateContainerRequest ) ( res * pb . CreateContainerResponse , err error ) {
2017-11-09 10:10:35 +00:00
const operation = "create_container"
defer func ( ) {
recordOperation ( operation , time . Now ( ) )
recordError ( operation , err )
} ( )
2016-11-22 16:49:54 +00:00
logrus . Debugf ( "CreateContainerRequest %+v" , req )
2017-04-04 15:24:55 +00:00
s . updateLock . RLock ( )
defer s . updateLock . RUnlock ( )
2017-02-03 14:41:28 +00:00
sbID := req . PodSandboxId
2016-11-22 16:49:54 +00:00
if sbID == "" {
return nil , fmt . Errorf ( "PodSandboxId should not be empty" )
}
2017-07-25 15:36:33 +00:00
sandboxID , err := s . PodIDIndex ( ) . Get ( sbID )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , fmt . Errorf ( "PodSandbox with ID starting with %s not found: %v" , sbID , err )
}
sb := s . getSandbox ( sandboxID )
if sb == nil {
return nil , fmt . Errorf ( "specified sandbox not found: %s" , sandboxID )
}
// The config of the container
containerConfig := req . GetConfig ( )
if containerConfig == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig is nil" )
}
2017-02-03 14:41:28 +00:00
name := containerConfig . GetMetadata ( ) . Name
2016-11-22 16:49:54 +00:00
if name == "" {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Name is empty" )
}
2017-07-19 19:03:22 +00:00
containerID , containerName , err := s . generateContainerIDandName ( sb . Metadata ( ) , containerConfig )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
defer func ( ) {
if err != nil {
2017-07-20 17:10:16 +00:00
s . ReleaseContainerName ( containerName )
2016-11-22 16:49:54 +00:00
}
} ( )
2016-10-18 14:48:33 +00:00
container , err := s . createSandboxContainer ( ctx , containerID , containerName , sb , req . GetSandboxConfig ( ) , containerConfig )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
2016-10-18 14:48:33 +00:00
defer func ( ) {
if err != nil {
2017-07-31 18:38:45 +00:00
err2 := s . StorageRuntimeServer ( ) . DeleteContainer ( containerID )
2016-10-18 14:48:33 +00:00
if err2 != nil {
logrus . Warnf ( "Failed to cleanup container directory: %v" , err2 )
}
}
} ( )
2016-11-22 16:49:54 +00:00
2017-07-19 19:03:22 +00:00
if err = s . Runtime ( ) . CreateContainer ( container , sb . CgroupParent ( ) ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
s . addContainer ( container )
2017-07-17 12:25:32 +00:00
if err = s . CtrIDIndex ( ) . Add ( containerID ) ; err != nil {
2016-11-22 16:49:54 +00:00
s . removeContainer ( container )
return nil , err
}
2017-07-20 17:05:12 +00:00
s . ContainerStateToDisk ( container )
2017-05-11 10:03:59 +00:00
2016-11-22 16:49:54 +00:00
resp := & pb . CreateContainerResponse {
2017-02-03 14:41:28 +00:00
ContainerId : containerID ,
2016-11-22 16:49:54 +00:00
}
logrus . Debugf ( "CreateContainerResponse: %+v" , resp )
return resp , nil
}
2017-08-12 10:29:22 +00:00
func ( s * Server ) setupOCIHooks ( specgen * generate . Generator , sb * sandbox . Sandbox , containerConfig * pb . ContainerConfig , command string ) error {
mounts := containerConfig . GetMounts ( )
addedHooks := map [ string ] struct { } { }
addHook := func ( hook libkpod . HookParams ) error {
// Only add a hook once
if _ , ok := addedHooks [ hook . Hook ] ; ! ok {
if err := addOCIHook ( specgen , hook ) ; err != nil {
return err
}
addedHooks [ hook . Hook ] = struct { } { }
}
return nil
}
for _ , hook := range s . Hooks ( ) {
logrus . Debugf ( "SetupOCIHooks" , hook )
if hook . HasBindMounts && len ( mounts ) > 0 {
if err := addHook ( hook ) ; err != nil {
return err
}
continue
}
for _ , cmd := range hook . Cmds {
match , err := regexp . MatchString ( cmd , command )
if err != nil {
logrus . Errorf ( "Invalid regex %q:%q" , cmd , err )
continue
}
if match {
if err := addHook ( hook ) ; err != nil {
return err
}
}
}
for _ , annotationRegex := range hook . Annotations {
for _ , annotation := range sb . Annotations ( ) {
match , err := regexp . MatchString ( annotationRegex , annotation )
if err != nil {
logrus . Errorf ( "Invalid regex %q:%q" , annotationRegex , err )
continue
}
if match {
if err := addHook ( hook ) ; err != nil {
return err
}
}
}
}
}
return nil
}
2017-07-19 19:03:22 +00:00
func ( s * Server ) createSandboxContainer ( ctx context . Context , containerID string , containerName string , sb * sandbox . Sandbox , SandboxConfig * pb . PodSandboxConfig , containerConfig * pb . ContainerConfig ) ( * oci . Container , error ) {
2016-11-22 16:49:54 +00:00
if sb == nil {
return nil , errors . New ( "createSandboxContainer needs a sandbox" )
}
2016-10-18 14:48:33 +00:00
2017-02-03 14:41:28 +00:00
// TODO: simplify this function (cyclomatic complexity here is high)
2016-10-18 14:48:33 +00:00
// TODO: factor generating/updating the spec into something other projects can vendor
2016-11-22 16:49:54 +00:00
// creates a spec Generator with the default spec.
specgen := generate . New ( )
2017-05-30 15:04:21 +00:00
specgen . HostSpecific = true
2017-07-03 21:49:34 +00:00
specgen . ClearProcessRlimits ( )
2016-11-22 16:49:54 +00:00
2017-09-20 13:19:58 +00:00
var readOnlyRootfs bool
var privileged bool
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) != nil {
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . Privileged {
privileged = true
}
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . ReadonlyRootfs {
readOnlyRootfs = true
specgen . SetRootReadonly ( true )
}
}
2017-09-13 19:06:54 +00:00
mountLabel := sb . MountLabel ( )
processLabel := sb . ProcessLabel ( )
selinuxConfig := containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetSelinuxOptions ( )
if selinuxConfig != nil {
var err error
2017-09-20 13:19:58 +00:00
processLabel , mountLabel , err = getSELinuxLabels ( selinuxConfig , privileged )
2017-09-13 19:06:54 +00:00
if err != nil {
return nil , err
}
}
2017-10-28 21:43:20 +00:00
containerVolumes , ociMounts , err := addOCIBindMounts ( mountLabel , containerConfig , & specgen )
2017-08-14 19:52:25 +00:00
if err != nil {
return nil , err
}
volumesJSON , err := json . Marshal ( containerVolumes )
if err != nil {
2017-02-22 18:42:44 +00:00
return nil , err
2016-11-22 16:49:54 +00:00
}
2017-08-14 19:52:25 +00:00
specgen . AddAnnotation ( annotations . Volumes , string ( volumesJSON ) )
2016-11-22 16:49:54 +00:00
2017-07-07 23:32:37 +00:00
// Add cgroup mount so container process can introspect its own limits
specgen . AddCgroupsMount ( "ro" )
2017-05-08 22:11:36 +00:00
if err := addDevices ( sb , containerConfig , & specgen ) ; err != nil {
return nil , err
}
2016-11-22 16:49:54 +00:00
labels := containerConfig . GetLabels ( )
metadata := containerConfig . GetMetadata ( )
2017-06-01 16:40:33 +00:00
kubeAnnotations := containerConfig . GetAnnotations ( )
if kubeAnnotations != nil {
for k , v := range kubeAnnotations {
2016-11-22 16:49:54 +00:00
specgen . AddAnnotation ( k , v )
}
}
2017-09-04 16:11:32 +00:00
if labels != nil {
for k , v := range labels {
specgen . AddAnnotation ( k , v )
}
}
2016-11-24 13:27:56 +00:00
// set this container's apparmor profile if it is set by sandbox
2017-09-06 11:25:19 +00:00
if s . appArmorEnabled && ! privileged {
2017-07-19 19:03:22 +00:00
appArmorProfileName := s . getAppArmorProfileName ( sb . Annotations ( ) , metadata . Name )
2016-11-29 12:34:15 +00:00
if appArmorProfileName != "" {
2016-12-12 07:55:17 +00:00
// reload default apparmor profile if it is unloaded.
if s . appArmorProfile == apparmor . DefaultApparmorProfile {
if err := apparmor . EnsureDefaultApparmorProfile ( ) ; err != nil {
return nil , err
}
}
2016-11-29 12:34:15 +00:00
specgen . SetProcessApparmorProfile ( appArmorProfileName )
}
2016-11-24 13:27:56 +00:00
}
2016-11-22 16:49:54 +00:00
2017-02-03 14:41:28 +00:00
logPath := containerConfig . LogPath
2016-10-07 15:59:39 +00:00
if logPath == "" {
// TODO: Should we use sandboxConfig.GetLogDirectory() here?
2017-07-19 19:03:22 +00:00
logPath = filepath . Join ( sb . LogDir ( ) , containerID + ".log" )
2016-10-07 15:59:39 +00:00
}
if ! filepath . IsAbs ( logPath ) {
// XXX: It's not really clear what this should be versus the sbox logDirectory.
logrus . Warnf ( "requested logPath for ctr id %s is a relative path: %s" , containerID , logPath )
2017-07-19 19:03:22 +00:00
logPath = filepath . Join ( sb . LogDir ( ) , logPath )
2016-10-07 15:59:39 +00:00
}
2017-04-04 14:11:53 +00:00
// Handle https://issues.k8s.io/44043
if err := ensureSaneLogPath ( logPath ) ; err != nil {
return nil , err
}
2016-10-07 15:59:39 +00:00
logrus . WithFields ( logrus . Fields {
2017-07-19 19:03:22 +00:00
"sbox.logdir" : sb . LogDir ( ) ,
2016-10-07 15:59:39 +00:00
"ctr.logfile" : containerConfig . LogPath ,
"log_path" : logPath ,
} ) . Debugf ( "setting container's log_path" )
2017-02-03 14:41:28 +00:00
specgen . SetProcessTerminal ( containerConfig . Tty )
2017-09-06 14:38:01 +00:00
if containerConfig . Tty {
specgen . AddProcessEnv ( "TERM" , "xterm" )
}
2016-11-22 16:49:54 +00:00
linux := containerConfig . GetLinux ( )
if linux != nil {
resources := linux . GetResources ( )
if resources != nil {
2017-02-03 14:41:28 +00:00
cpuPeriod := resources . CpuPeriod
2016-11-22 16:49:54 +00:00
if cpuPeriod != 0 {
specgen . SetLinuxResourcesCPUPeriod ( uint64 ( cpuPeriod ) )
}
2017-02-03 14:41:28 +00:00
cpuQuota := resources . CpuQuota
2016-11-22 16:49:54 +00:00
if cpuQuota != 0 {
2017-04-12 23:12:04 +00:00
specgen . SetLinuxResourcesCPUQuota ( cpuQuota )
2016-11-22 16:49:54 +00:00
}
2017-02-03 14:41:28 +00:00
cpuShares := resources . CpuShares
2016-11-22 16:49:54 +00:00
if cpuShares != 0 {
specgen . SetLinuxResourcesCPUShares ( uint64 ( cpuShares ) )
}
2017-02-03 14:41:28 +00:00
memoryLimit := resources . MemoryLimitInBytes
2016-11-22 16:49:54 +00:00
if memoryLimit != 0 {
2017-07-20 04:07:01 +00:00
specgen . SetLinuxResourcesMemoryLimit ( memoryLimit )
2016-11-22 16:49:54 +00:00
}
2017-02-03 14:41:28 +00:00
oomScoreAdj := resources . OomScoreAdj
2017-07-20 04:07:01 +00:00
specgen . SetProcessOOMScoreAdj ( int ( oomScoreAdj ) )
2016-11-22 16:49:54 +00:00
}
2017-08-29 15:52:05 +00:00
var cgPath string
2017-08-29 21:11:30 +00:00
parent := defaultCgroupfsParent
useSystemd := s . config . CgroupManager == oci . SystemdCgroupsManager
2017-08-29 15:52:05 +00:00
if useSystemd {
2017-08-29 21:11:30 +00:00
parent = defaultSystemdParent
2017-08-29 15:52:05 +00:00
}
2017-07-19 19:03:22 +00:00
if sb . CgroupParent ( ) != "" {
2017-08-29 15:52:05 +00:00
parent = sb . CgroupParent ( )
}
if useSystemd {
cgPath = parent + ":" + scopePrefix + ":" + containerID
} else {
cgPath = filepath . Join ( parent , scopePrefix + "-" + containerID )
2016-12-13 08:34:55 +00:00
}
2017-08-29 15:52:05 +00:00
specgen . SetLinuxCgroupsPath ( cgPath )
2016-12-13 08:34:55 +00:00
2016-11-22 16:49:54 +00:00
capabilities := linux . GetSecurityContext ( ) . GetCapabilities ( )
2017-09-06 11:25:19 +00:00
if privileged {
// this is setting correct capabilities as well for privileged mode
specgen . SetupPrivileged ( true )
2017-09-20 13:19:58 +00:00
setOCIBindMountsPrivileged ( & specgen )
2017-09-06 11:25:19 +00:00
} else {
toCAPPrefixed := func ( cap string ) string {
if ! strings . HasPrefix ( strings . ToLower ( cap ) , "cap_" ) {
return "CAP_" + strings . ToUpper ( cap )
}
return cap
2017-05-05 10:14:34 +00:00
}
2017-09-06 11:25:19 +00:00
// Add/drop all capabilities if "all" is specified, so that
// following individual add/drop could still work. E.g.
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
// will be all capabilities without `CAP_CHOWN`.
// see https://github.com/kubernetes/kubernetes/issues/51980
if inStringSlice ( capabilities . GetAddCapabilities ( ) , "ALL" ) {
for _ , c := range getOCICapabilitiesList ( ) {
if err := specgen . AddProcessCapability ( c ) ; err != nil {
return nil , err
}
}
}
if inStringSlice ( capabilities . GetDropCapabilities ( ) , "ALL" ) {
for _ , c := range getOCICapabilitiesList ( ) {
if err := specgen . DropProcessCapability ( c ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
}
}
2017-09-06 11:25:19 +00:00
if capabilities != nil {
for _ , cap := range capabilities . GetAddCapabilities ( ) {
if strings . ToUpper ( cap ) == "ALL" {
continue
}
if err := specgen . AddProcessCapability ( toCAPPrefixed ( cap ) ) ; err != nil {
return nil , err
}
}
for _ , cap := range capabilities . GetDropCapabilities ( ) {
if strings . ToUpper ( cap ) == "ALL" {
continue
}
2017-05-05 10:14:34 +00:00
if err := specgen . DropProcessCapability ( toCAPPrefixed ( cap ) ) ; err != nil {
2017-09-06 11:25:19 +00:00
return nil , fmt . Errorf ( "failed to drop cap %s %v" , toCAPPrefixed ( cap ) , err )
2016-11-22 16:49:54 +00:00
}
}
}
}
2017-09-20 13:19:58 +00:00
specgen . SetProcessSelinuxLabel ( processLabel )
specgen . SetLinuxMountLabel ( mountLabel )
2017-09-27 12:34:33 +00:00
specgen . SetProcessNoNewPrivileges ( linux . GetSecurityContext ( ) . GetNoNewPrivs ( ) )
2016-11-22 16:49:54 +00:00
2017-05-31 00:03:54 +00:00
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) != nil &&
2017-06-16 22:49:16 +00:00
! containerConfig . GetLinux ( ) . GetSecurityContext ( ) . Privileged {
2017-05-31 00:03:54 +00:00
for _ , mp := range [ ] string {
"/proc/kcore" ,
"/proc/latency_stats" ,
"/proc/timer_list" ,
"/proc/timer_stats" ,
"/proc/sched_debug" ,
"/sys/firmware" ,
} {
specgen . AddLinuxMaskedPaths ( mp )
}
2017-05-12 10:47:40 +00:00
2017-05-31 00:03:54 +00:00
for _ , rp := range [ ] string {
"/proc/asound" ,
"/proc/bus" ,
"/proc/fs" ,
"/proc/irq" ,
"/proc/sys" ,
"/proc/sysrq-trigger" ,
} {
specgen . AddLinuxReadonlyPaths ( rp )
}
2017-05-12 10:47:40 +00:00
}
2016-11-22 16:49:54 +00:00
}
// Join the namespace paths for the pod sandbox container.
2017-07-19 19:03:22 +00:00
podInfraState := s . Runtime ( ) . ContainerStatus ( sb . InfraContainer ( ) )
2016-11-22 16:49:54 +00:00
logrus . Debugf ( "pod container state %+v" , podInfraState )
2016-11-23 17:16:21 +00:00
ipcNsPath := fmt . Sprintf ( "/proc/%d/ns/ipc" , podInfraState . Pid )
2017-10-19 19:12:55 +00:00
if err := specgen . AddOrReplaceLinuxNamespace ( string ( rspec . IPCNamespace ) , ipcNsPath ) ; err != nil {
2016-11-23 17:16:21 +00:00
return nil , err
}
2017-10-19 19:12:55 +00:00
utsNsPath := fmt . Sprintf ( "/proc/%d/ns/uts" , podInfraState . Pid )
if err := specgen . AddOrReplaceLinuxNamespace ( string ( rspec . UTSNamespace ) , utsNsPath ) ; err != nil {
return nil , err
}
// Do not share pid ns for now
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . GetHostPid ( ) {
specgen . RemoveLinuxNamespace ( string ( rspec . PIDNamespace ) )
}
2017-07-18 20:35:15 +00:00
netNsPath := sb . NetNsPath ( )
2016-11-23 17:16:21 +00:00
if netNsPath == "" {
// The sandbox does not have a permanent namespace,
// it's on the host one.
netNsPath = fmt . Sprintf ( "/proc/%d/ns/net" , podInfraState . Pid )
}
2017-10-19 19:12:55 +00:00
if err := specgen . AddOrReplaceLinuxNamespace ( string ( rspec . NetworkNamespace ) , netNsPath ) ; err != nil {
2016-11-23 17:16:21 +00:00
return nil , err
2016-11-22 16:49:54 +00:00
}
2016-12-12 10:12:03 +00:00
imageSpec := containerConfig . GetImage ( )
if imageSpec == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Image is nil" )
}
2017-02-03 14:41:28 +00:00
image := imageSpec . Image
2016-12-12 10:12:03 +00:00
if image == "" {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Image.Image is empty" )
}
2017-07-20 08:01:23 +00:00
images , err := s . StorageImageServer ( ) . ResolveNames ( image )
if err != nil {
// This means we got an image ID
if strings . Contains ( err . Error ( ) , "cannot specify 64-byte hexadecimal strings" ) {
images = append ( images , image )
} else {
return nil , err
}
}
image = images [ 0 ]
2016-12-12 10:12:03 +00:00
2017-08-14 19:29:53 +00:00
// Get imageName and imageRef that are requested in container status
imageName := image
status , err := s . StorageImageServer ( ) . ImageStatus ( s . ImageContext ( ) , image )
if err != nil {
return nil , err
}
imageRef := status . ID
//
// TODO: https://github.com/kubernetes-incubator/cri-o/issues/531
//
//for _, n := range status.Names {
//r, err := reference.ParseNormalizedNamed(n)
//if err != nil {
//return nil, fmt.Errorf("failed to normalize image name for ImageRef: %v", err)
//}
//if digested, isDigested := r.(reference.Canonical); isDigested {
//imageRef = reference.FamiliarString(digested)
//break
//}
//}
for _ , n := range status . Names {
r , err := reference . ParseNormalizedNamed ( n )
if err != nil {
return nil , fmt . Errorf ( "failed to normalize image name for Image: %v" , err )
}
if tagged , isTagged := r . ( reference . Tagged ) ; isTagged {
imageName = reference . FamiliarString ( tagged )
break
}
}
specgen . AddAnnotation ( annotations . ImageName , imageName )
specgen . AddAnnotation ( annotations . ImageRef , imageRef )
2017-08-29 23:00:49 +00:00
specgen . AddAnnotation ( annotations . IP , sb . IP ( ) )
2017-08-14 19:29:53 +00:00
2016-12-08 23:32:17 +00:00
// bind mount the pod shm
2017-07-19 19:03:22 +00:00
specgen . AddBindMount ( sb . ShmPath ( ) , "/dev/shm" , [ ] string { "rw" } )
2016-12-08 23:32:17 +00:00
2017-06-14 13:28:13 +00:00
options := [ ] string { "rw" }
if readOnlyRootfs {
options = [ ] string { "ro" }
}
2017-07-19 19:03:22 +00:00
if sb . ResolvPath ( ) != "" {
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( sb . ResolvPath ( ) , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
return nil , err
}
2017-09-13 08:28:41 +00:00
2017-03-24 14:32:16 +00:00
// bind mount the pod resolver file
2017-07-19 19:03:22 +00:00
specgen . AddBindMount ( sb . ResolvPath ( ) , "/etc/resolv.conf" , options )
2017-03-24 14:32:16 +00:00
}
2017-09-13 08:28:41 +00:00
if sb . HostnamePath ( ) != "" {
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( sb . HostnamePath ( ) , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
return nil , err
}
2017-09-13 08:28:41 +00:00
specgen . AddBindMount ( sb . HostnamePath ( ) , "/etc/hostname" , options )
}
2017-04-22 00:54:29 +00:00
// Bind mount /etc/hosts for host networking containers
if hostNetwork ( containerConfig ) {
2017-06-14 13:28:13 +00:00
specgen . AddBindMount ( "/etc/hosts" , "/etc/hosts" , options )
2017-04-22 00:54:29 +00:00
}
2017-11-02 17:20:45 +00:00
// Set hostname and add env for hostname
2017-09-13 08:28:41 +00:00
specgen . SetHostname ( sb . Hostname ( ) )
2017-11-02 17:20:45 +00:00
specgen . AddProcessEnv ( "HOSTNAME" , sb . Hostname ( ) )
2017-03-29 23:11:57 +00:00
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Name , containerName )
2017-06-23 16:31:13 +00:00
specgen . AddAnnotation ( annotations . ContainerID , containerID )
2017-07-19 19:03:22 +00:00
specgen . AddAnnotation ( annotations . SandboxID , sb . ID ( ) )
specgen . AddAnnotation ( annotations . SandboxName , sb . InfraContainer ( ) . Name ( ) )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . ContainerType , annotations . ContainerTypeContainer )
specgen . AddAnnotation ( annotations . LogPath , logPath )
specgen . AddAnnotation ( annotations . TTY , fmt . Sprintf ( "%v" , containerConfig . Tty ) )
2017-06-08 20:08:29 +00:00
specgen . AddAnnotation ( annotations . Stdin , fmt . Sprintf ( "%v" , containerConfig . Stdin ) )
specgen . AddAnnotation ( annotations . StdinOnce , fmt . Sprintf ( "%v" , containerConfig . StdinOnce ) )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Image , image )
2017-09-22 23:44:02 +00:00
specgen . AddAnnotation ( annotations . ResolvPath , sb . InfraContainer ( ) . CrioAnnotations ( ) [ annotations . ResolvPath ] )
2016-11-22 16:49:54 +00:00
2017-05-11 09:22:47 +00:00
created := time . Now ( )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Created , created . Format ( time . RFC3339Nano ) )
2017-05-11 09:22:47 +00:00
2016-11-22 16:49:54 +00:00
metadataJSON , err := json . Marshal ( metadata )
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Metadata , string ( metadataJSON ) )
2016-11-22 16:49:54 +00:00
labelsJSON , err := json . Marshal ( labels )
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Labels , string ( labelsJSON ) )
2016-11-22 16:49:54 +00:00
2017-06-01 16:40:33 +00:00
kubeAnnotationsJSON , err := json . Marshal ( kubeAnnotations )
2016-12-12 17:55:34 +00:00
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Annotations , string ( kubeAnnotationsJSON ) )
2016-12-12 17:55:34 +00:00
2017-09-27 18:46:31 +00:00
spp := containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetSeccompProfilePath ( )
2017-09-06 11:25:19 +00:00
if ! privileged {
2017-09-27 18:46:31 +00:00
if err = s . setupSeccomp ( & specgen , spp ) ; err != nil {
2017-09-06 11:25:19 +00:00
return nil , err
}
2016-11-23 09:41:48 +00:00
}
2017-09-27 18:46:31 +00:00
specgen . AddAnnotation ( annotations . SeccompProfilePath , spp )
// TODO(runcom): add spp to container...
2016-11-23 09:41:48 +00:00
2017-09-27 18:46:31 +00:00
metaname := metadata . Name
2017-02-03 14:41:28 +00:00
attempt := metadata . Attempt
2017-07-31 18:38:45 +00:00
containerInfo , err := s . StorageRuntimeServer ( ) . CreateContainer ( s . ImageContext ( ) ,
2017-07-19 19:03:22 +00:00
sb . Name ( ) , sb . ID ( ) ,
2017-01-31 15:32:27 +00:00
image , image ,
2016-10-18 14:48:33 +00:00
containerName , containerID ,
metaname ,
attempt ,
2017-09-13 19:06:54 +00:00
mountLabel ,
2016-10-18 14:48:33 +00:00
nil )
if err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
2017-07-31 18:38:45 +00:00
mountPoint , err := s . StorageRuntimeServer ( ) . StartContainer ( containerID )
2016-10-18 14:48:33 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to mount container %s(%s): %v" , containerName , containerID , err )
}
2017-08-31 13:16:25 +00:00
specgen . AddAnnotation ( annotations . MountPoint , mountPoint )
2016-10-18 14:48:33 +00:00
2017-04-04 22:39:59 +00:00
containerImageConfig := containerInfo . Config
2017-06-07 16:38:04 +00:00
if containerImageConfig == nil {
return nil , fmt . Errorf ( "empty image config for %s" , image )
}
2017-04-04 22:39:59 +00:00
2017-05-26 16:31:28 +00:00
if containerImageConfig . Config . StopSignal != "" {
// this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
specgen . AddAnnotation ( "org.opencontainers.image.stopSignal" , containerImageConfig . Config . StopSignal )
}
2017-07-14 22:32:25 +00:00
// Add image volumes
2017-10-28 21:43:20 +00:00
volumeMounts , err := addImageVolumes ( mountPoint , s , & containerInfo , & specgen , mountLabel )
if err != nil {
2017-07-14 22:32:25 +00:00
return nil , err
2017-05-22 15:19:49 +00:00
}
2017-04-04 22:39:59 +00:00
processArgs , err := buildOCIProcessArgs ( containerConfig , containerImageConfig )
2017-03-16 09:59:41 +00:00
if err != nil {
return nil , err
2016-10-18 14:48:33 +00:00
}
specgen . SetProcessArgs ( processArgs )
2017-03-27 22:53:47 +00:00
// Add environment variables from CRI and image config
envs := containerConfig . GetEnvs ( )
if envs != nil {
for _ , item := range envs {
key := item . Key
value := item . Value
if key == "" {
continue
}
specgen . AddProcessEnv ( key , value )
}
}
2017-04-04 22:39:59 +00:00
if containerImageConfig != nil {
for _ , item := range containerImageConfig . Config . Env {
parts := strings . SplitN ( item , "=" , 2 )
if len ( parts ) != 2 {
return nil , fmt . Errorf ( "invalid env from image: %s" , item )
}
2017-03-27 22:53:47 +00:00
2017-04-04 22:39:59 +00:00
if parts [ 0 ] == "" {
continue
}
specgen . AddProcessEnv ( parts [ 0 ] , parts [ 1 ] )
2017-03-27 22:53:47 +00:00
}
}
// Set working directory
// Pick it up from image config first and override if specified in CRI
2017-03-31 21:04:16 +00:00
containerCwd := "/"
2017-04-04 22:39:59 +00:00
if containerImageConfig != nil {
imageCwd := containerImageConfig . Config . WorkingDir
if imageCwd != "" {
containerCwd = imageCwd
}
2017-03-31 21:04:16 +00:00
}
runtimeCwd := containerConfig . WorkingDir
if runtimeCwd != "" {
containerCwd = runtimeCwd
2017-03-27 22:53:47 +00:00
}
2017-03-31 21:04:16 +00:00
specgen . SetProcessCwd ( containerCwd )
2017-11-03 17:59:52 +00:00
if err := setupWorkingDirectory ( mountPoint , mountLabel , containerCwd ) ; err != nil {
if err1 := s . StorageRuntimeServer ( ) . StopContainer ( containerID ) ; err1 != nil {
return nil , fmt . Errorf ( "can't umount container after cwd error %v: %v" , err , err1 )
}
return nil , err
}
2017-03-27 22:53:47 +00:00
2017-10-28 21:43:20 +00:00
var secretMounts [ ] rspec . Mount
if len ( s . config . DefaultMounts ) > 0 {
var err error
secretMounts , err = addSecretsBindMounts ( mountLabel , containerInfo . RunDir , s . config . DefaultMounts , specgen )
if err != nil {
return nil , fmt . Errorf ( "failed to mount secrets: %v" , err )
}
}
mounts := [ ] rspec . Mount { }
mounts = append ( mounts , ociMounts ... )
mounts = append ( mounts , volumeMounts ... )
mounts = append ( mounts , secretMounts ... )
sort . Sort ( orderedMounts ( mounts ) )
for _ , m := range mounts {
specgen . AddBindMount ( m . Source , m . Destination , m . Options )
}
2017-08-12 10:29:22 +00:00
if err := s . setupOCIHooks ( & specgen , sb , containerConfig , processArgs [ 0 ] ) ; err != nil {
return nil , err
}
2017-03-29 18:23:33 +00:00
// Setup user and groups
if linux != nil {
2017-04-04 22:39:59 +00:00
if err = setupContainerUser ( & specgen , mountPoint , linux . GetSecurityContext ( ) , containerImageConfig ) ; err != nil {
2017-03-29 18:23:33 +00:00
return nil , err
}
}
2017-07-07 21:43:35 +00:00
// Set up pids limit if pids cgroup is mounted
_ , err = cgroups . FindCgroupMountpoint ( "pids" )
if err == nil {
specgen . SetLinuxResourcesPidsLimit ( s . config . PidsLimit )
}
2016-10-18 14:48:33 +00:00
// by default, the root path is an empty string. set it now.
specgen . SetRootPath ( mountPoint )
saveOptions := generate . ExportOptions { }
if err = specgen . SaveToFile ( filepath . Join ( containerInfo . Dir , "config.json" ) , saveOptions ) ; err != nil {
return nil , err
}
if err = specgen . SaveToFile ( filepath . Join ( containerInfo . RunDir , "config.json" ) , saveOptions ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
2017-09-22 23:44:02 +00:00
crioAnnotations := specgen . Spec ( ) . Annotations
container , err := oci . NewContainer ( containerID , containerName , containerInfo . RunDir , logPath , sb . NetNs ( ) , labels , crioAnnotations , kubeAnnotations , image , imageName , imageRef , metadata , sb . ID ( ) , containerConfig . Tty , containerConfig . Stdin , containerConfig . StdinOnce , sb . Privileged ( ) , sb . Trusted ( ) , containerInfo . Dir , created , containerImageConfig . Config . StopSignal )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
2017-10-19 13:02:56 +00:00
container . SetSpec ( specgen . Spec ( ) )
2017-08-31 13:16:25 +00:00
container . SetMountPoint ( mountPoint )
2016-11-22 16:49:54 +00:00
2017-08-14 19:52:25 +00:00
for _ , cv := range containerVolumes {
container . AddVolume ( cv )
}
2016-11-22 16:49:54 +00:00
return container , nil
}
2017-09-27 18:46:31 +00:00
func ( s * Server ) setupSeccomp ( specgen * generate . Generator , profile string ) error {
if profile == "" {
// running w/o seccomp, aka unconfined
specgen . Spec ( ) . Linux . Seccomp = nil
return nil
2016-11-23 09:41:48 +00:00
}
if ! s . seccompEnabled {
if profile != seccompUnconfined {
return fmt . Errorf ( "seccomp is not enabled in your kernel, cannot run with a profile" )
}
logrus . Warn ( "seccomp is not enabled in your kernel, running container without profile" )
}
if profile == seccompUnconfined {
// running w/o seccomp, aka unconfined
specgen . Spec ( ) . Linux . Seccomp = nil
return nil
}
2017-09-18 10:50:39 +00:00
if profile == seccompRuntimeDefault || profile == seccompDockerDefault {
2016-11-23 09:41:48 +00:00
return seccomp . LoadProfileFromStruct ( s . seccompProfile , specgen )
}
if ! strings . HasPrefix ( profile , seccompLocalhostPrefix ) {
return fmt . Errorf ( "unknown seccomp profile option: %q" , profile )
}
2017-09-27 18:46:31 +00:00
fname := strings . TrimPrefix ( profile , "localhost/" )
file , err := ioutil . ReadFile ( filepath . FromSlash ( fname ) )
if err != nil {
return fmt . Errorf ( "cannot load seccomp profile %q: %v" , fname , err )
}
return seccomp . LoadProfileFromBytes ( file , specgen )
2016-11-23 09:41:48 +00:00
}
2016-11-30 08:19:36 +00:00
// getAppArmorProfileName gets the profile name for the given container.
func ( s * Server ) getAppArmorProfileName ( annotations map [ string ] string , ctrName string ) string {
profile := apparmor . GetProfileNameFromPodAnnotations ( annotations , ctrName )
if profile == "" {
return ""
}
if profile == apparmor . ProfileRuntimeDefault {
// If the value is runtime/default, then return default profile.
return s . appArmorProfile
}
2016-12-02 07:13:41 +00:00
return strings . TrimPrefix ( profile , apparmor . ProfileNamePrefix )
2016-11-30 08:19:36 +00:00
}
2017-03-29 18:16:53 +00:00
// openContainerFile opens a file inside a container rootfs safely
func openContainerFile ( rootfs string , path string ) ( io . ReadCloser , error ) {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , path ) , rootfs )
if err != nil {
return nil , err
}
return os . Open ( fp )
}
2017-03-29 18:18:35 +00:00
// getUserInfo returns UID, GID and additional groups for specified user
// by looking them up in /etc/passwd and /etc/group
func getUserInfo ( rootfs string , userName string ) ( uint32 , uint32 , [ ] uint32 , error ) {
// We don't care if we can't open the file because
// not all images will have these files
passwdFile , err := openContainerFile ( rootfs , "/etc/passwd" )
2017-03-29 18:23:33 +00:00
if err != nil {
logrus . Warnf ( "Failed to open /etc/passwd: %v" , err )
} else {
2017-03-29 18:18:35 +00:00
defer passwdFile . Close ( )
}
2017-03-29 18:23:33 +00:00
2017-03-29 18:18:35 +00:00
groupFile , err := openContainerFile ( rootfs , "/etc/group" )
2017-03-29 18:23:33 +00:00
if err != nil {
logrus . Warnf ( "Failed to open /etc/group: %v" , err )
} else {
2017-03-29 18:18:35 +00:00
defer groupFile . Close ( )
}
execUser , err := user . GetExecUser ( userName , nil , passwdFile , groupFile )
if err != nil {
return 0 , 0 , nil , err
}
uid := uint32 ( execUser . Uid )
gid := uint32 ( execUser . Gid )
var additionalGids [ ] uint32
for _ , g := range execUser . Sgids {
additionalGids = append ( additionalGids , uint32 ( g ) )
}
return uid , gid , additionalGids , nil
}
2017-09-20 13:19:58 +00:00
func setOCIBindMountsPrivileged ( g * generate . Generator ) {
spec := g . Spec ( )
// clear readonly for /sys and cgroup
for i , m := range spec . Mounts {
if spec . Mounts [ i ] . Destination == "/sys" && ! spec . Root . Readonly {
clearReadOnly ( & spec . Mounts [ i ] )
}
if m . Type == "cgroup" {
clearReadOnly ( & spec . Mounts [ i ] )
}
}
spec . Linux . ReadonlyPaths = nil
spec . Linux . MaskedPaths = nil
}
func clearReadOnly ( m * rspec . Mount ) {
var opt [ ] string
for _ , o := range m . Options {
if o != "ro" {
opt = append ( opt , o )
}
}
m . Options = opt
}
2017-11-03 17:59:52 +00:00
func setupWorkingDirectory ( rootfs , mountLabel , containerCwd string ) error {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , containerCwd ) , rootfs )
if err != nil {
return err
}
if err := os . MkdirAll ( fp , 0755 ) ; err != nil {
return err
}
if mountLabel != "" {
if err1 := label . Relabel ( fp , mountLabel , true ) ; err1 != nil && err1 != unix . ENOTSUP {
return fmt . Errorf ( "relabel failed %s: %v" , fp , err1 )
}
}
return nil
}