2016-11-22 22:23:01 +00:00
package server
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
2016-12-08 23:32:17 +00:00
"strconv"
2017-05-18 00:45:57 +00:00
"strings"
2016-12-08 23:32:17 +00:00
"syscall"
2017-05-11 08:43:50 +00:00
"time"
2016-11-22 22:23:01 +00:00
"github.com/Sirupsen/logrus"
2017-05-17 17:18:35 +00:00
"github.com/containers/storage"
2016-11-22 22:23:01 +00:00
"github.com/kubernetes-incubator/cri-o/oci"
2017-05-18 00:45:57 +00:00
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
2016-11-22 22:23:01 +00:00
"github.com/opencontainers/runtime-tools/generate"
2017-03-22 17:58:35 +00:00
"github.com/opencontainers/selinux/go-selinux/label"
2016-11-22 22:23:01 +00:00
"golang.org/x/net/context"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
2017-02-21 17:19:06 +00:00
// privilegedSandbox returns true if the sandbox configuration
// requires additional host privileges for the sandbox.
func ( s * Server ) privilegedSandbox ( req * pb . RunPodSandboxRequest ) bool {
securityContext := req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( )
if securityContext == nil {
return false
}
if securityContext . Privileged {
return true
}
namespaceOptions := securityContext . GetNamespaceOptions ( )
if namespaceOptions == nil {
return false
}
if namespaceOptions . HostNetwork ||
namespaceOptions . HostPid ||
namespaceOptions . HostIpc {
return true
}
return false
}
2017-03-06 23:08:46 +00:00
func ( s * Server ) runContainer ( container * oci . Container , cgroupParent string ) error {
if err := s . runtime . CreateContainer ( container , cgroupParent ) ; err != nil {
2016-12-06 12:17:52 +00:00
return err
}
if err := s . runtime . UpdateStatus ( container ) ; err != nil {
return err
}
if err := s . runtime . StartContainer ( container ) ; err != nil {
return err
}
if err := s . runtime . UpdateStatus ( container ) ; err != nil {
return err
}
return nil
}
2016-11-22 22:23:01 +00:00
// RunPodSandbox creates and runs a pod-level sandbox.
2016-11-23 17:16:21 +00:00
func ( s * Server ) RunPodSandbox ( ctx context . Context , req * pb . RunPodSandboxRequest ) ( resp * pb . RunPodSandboxResponse , err error ) {
2017-04-04 15:24:55 +00:00
s . updateLock . RLock ( )
defer s . updateLock . RUnlock ( )
2016-11-22 22:23:01 +00:00
logrus . Debugf ( "RunPodSandboxRequest %+v" , req )
2017-03-24 14:28:14 +00:00
var processLabel , mountLabel , netNsPath , resolvPath string
2016-11-22 22:23:01 +00:00
// process req.Name
2017-05-04 16:41:15 +00:00
kubeName := req . GetConfig ( ) . GetMetadata ( ) . Name
if kubeName == "" {
2016-11-22 22:23:01 +00:00
return nil , fmt . Errorf ( "PodSandboxConfig.Name should not be empty" )
}
2017-02-03 14:41:28 +00:00
namespace := req . GetConfig ( ) . GetMetadata ( ) . Namespace
attempt := req . GetConfig ( ) . GetMetadata ( ) . Attempt
2016-11-22 22:23:01 +00:00
2017-05-04 16:41:15 +00:00
id , name , err := s . generatePodIDandName ( kubeName , namespace , attempt )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
2016-10-18 14:48:33 +00:00
_ , containerName , err := s . generateContainerIDandName ( name , "infra" , attempt )
if err != nil {
return nil , err
}
2016-11-22 22:23:01 +00:00
defer func ( ) {
if err != nil {
s . releasePodName ( name )
}
} ( )
2017-04-19 19:17:10 +00:00
podContainer , err := s . storageRuntimeServer . CreatePodSandbox ( s . imageContext ,
2016-10-18 14:48:33 +00:00
name , id ,
s . config . PauseImage , "" ,
containerName ,
2017-02-03 14:41:28 +00:00
req . GetConfig ( ) . GetMetadata ( ) . Name ,
req . GetConfig ( ) . GetMetadata ( ) . Uid ,
2016-10-18 14:48:33 +00:00
namespace ,
attempt ,
nil )
if err == storage . ErrDuplicateName {
return nil , fmt . Errorf ( "pod sandbox with name %q already exists" , name )
}
if err != nil {
return nil , fmt . Errorf ( "error creating pod sandbox with name %q: %v" , name , err )
2016-11-22 22:23:01 +00:00
}
defer func ( ) {
if err != nil {
2017-04-19 19:17:10 +00:00
if err2 := s . storageRuntimeServer . RemovePodSandbox ( id ) ; err2 != nil {
2016-10-18 14:48:33 +00:00
logrus . Warnf ( "couldn't cleanup pod sandbox %q: %v" , id , err2 )
2016-11-22 22:23:01 +00:00
}
}
} ( )
2016-10-18 14:48:33 +00:00
// TODO: factor generating/updating the spec into something other projects can vendor
2016-11-22 22:23:01 +00:00
// creates a spec Generator with the default spec.
g := generate . New ( )
// setup defaults for the pod sandbox
g . SetRootReadonly ( true )
2016-10-18 14:48:33 +00:00
if s . config . PauseCommand == "" {
if podContainer . Config != nil {
g . SetProcessArgs ( podContainer . Config . Config . Cmd )
} else {
g . SetProcessArgs ( [ ] string { podInfraCommand } )
}
} else {
g . SetProcessArgs ( [ ] string { s . config . PauseCommand } )
}
2016-11-22 22:23:01 +00:00
// set hostname
2017-02-03 14:41:28 +00:00
hostname := req . GetConfig ( ) . Hostname
2016-11-22 22:23:01 +00:00
if hostname != "" {
g . SetHostname ( hostname )
}
// set DNS options
2017-02-03 14:41:28 +00:00
if req . GetConfig ( ) . GetDnsConfig ( ) != nil {
dnsServers := req . GetConfig ( ) . GetDnsConfig ( ) . Servers
dnsSearches := req . GetConfig ( ) . GetDnsConfig ( ) . Searches
dnsOptions := req . GetConfig ( ) . GetDnsConfig ( ) . Options
2017-03-24 14:28:14 +00:00
resolvPath = fmt . Sprintf ( "%s/resolv.conf" , podContainer . RunDir )
2017-02-03 14:41:28 +00:00
err = parseDNSOptions ( dnsServers , dnsSearches , dnsOptions , resolvPath )
if err != nil {
err1 := removeFile ( resolvPath )
if err1 != nil {
err = err1
return nil , fmt . Errorf ( "%v; failed to remove %s: %v" , err , resolvPath , err1 )
}
return nil , err
2016-11-22 22:23:01 +00:00
}
2017-02-03 14:41:28 +00:00
g . AddBindMount ( resolvPath , "/etc/resolv.conf" , [ ] string { "ro" } )
2016-11-22 22:23:01 +00:00
}
// add metadata
metadata := req . GetConfig ( ) . GetMetadata ( )
metadataJSON , err := json . Marshal ( metadata )
if err != nil {
return nil , err
}
// add labels
labels := req . GetConfig ( ) . GetLabels ( )
labelsJSON , err := json . Marshal ( labels )
if err != nil {
return nil , err
}
// add annotations
annotations := req . GetConfig ( ) . GetAnnotations ( )
annotationsJSON , err := json . Marshal ( annotations )
if err != nil {
return nil , err
}
2016-10-07 15:59:39 +00:00
// set log directory
logDir := req . GetConfig ( ) . LogDirectory
if logDir == "" {
logDir = filepath . Join ( s . config . LogDir , id )
}
if err = os . MkdirAll ( logDir , 0700 ) ; err != nil {
return nil , err
}
// This should always be absolute from k8s.
if ! filepath . IsAbs ( logDir ) {
return nil , fmt . Errorf ( "requested logDir for sbox id %s is a relative path: %s" , id , logDir )
}
2016-11-22 22:23:01 +00:00
// Don't use SELinux separation with Host Pid or IPC Namespace,
2017-02-03 14:41:28 +00:00
if ! req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . HostPid && ! req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . HostIpc {
2016-11-22 22:23:01 +00:00
processLabel , mountLabel , err = getSELinuxLabels ( nil )
if err != nil {
return nil , err
}
g . SetProcessSelinuxLabel ( processLabel )
2017-03-15 18:57:05 +00:00
g . SetLinuxMountLabel ( mountLabel )
2016-11-22 22:23:01 +00:00
}
2016-12-08 23:32:17 +00:00
// create shm mount for the pod containers.
var shmPath string
2017-02-03 14:41:28 +00:00
if req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . HostIpc {
2016-12-08 23:32:17 +00:00
shmPath = "/dev/shm"
} else {
2016-10-18 14:48:33 +00:00
shmPath , err = setupShm ( podContainer . RunDir , mountLabel )
2016-12-08 23:32:17 +00:00
if err != nil {
return nil , err
}
defer func ( ) {
if err != nil {
if err2 := syscall . Unmount ( shmPath , syscall . MNT_DETACH ) ; err2 != nil {
logrus . Warnf ( "failed to unmount shm for pod: %v" , err2 )
}
}
} ( )
}
2016-10-18 14:48:33 +00:00
err = s . setPodSandboxMountLabel ( id , mountLabel )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
defer func ( ) {
if err != nil {
s . releaseContainerName ( containerName )
}
} ( )
2016-10-18 14:48:33 +00:00
if err = s . ctrIDIndex . Add ( id ) ; err != nil {
2016-11-22 22:23:01 +00:00
return nil , err
}
defer func ( ) {
if err != nil {
2016-10-18 14:48:33 +00:00
if err2 := s . ctrIDIndex . Delete ( id ) ; err2 != nil {
logrus . Warnf ( "couldn't delete ctr id %s from idIndex" , id )
2016-11-22 22:23:01 +00:00
}
}
} ( )
2016-10-07 15:59:39 +00:00
// set log path inside log directory
logPath := filepath . Join ( logDir , id + ".log" )
2017-02-21 17:19:06 +00:00
2017-04-04 14:11:53 +00:00
// Handle https://issues.k8s.io/44043
if err := ensureSaneLogPath ( logPath ) ; err != nil {
return nil , err
}
2016-10-07 15:59:39 +00:00
privileged := s . privilegedSandbox ( req )
2017-05-12 13:36:15 +00:00
g . AddAnnotation ( "crio/metadata" , string ( metadataJSON ) )
g . AddAnnotation ( "crio/labels" , string ( labelsJSON ) )
g . AddAnnotation ( "crio/annotations" , string ( annotationsJSON ) )
g . AddAnnotation ( "crio/log_path" , logPath )
g . AddAnnotation ( "crio/name" , name )
g . AddAnnotation ( "crio/container_type" , containerTypeSandbox )
g . AddAnnotation ( "crio/sandbox_id" , id )
g . AddAnnotation ( "crio/container_name" , containerName )
g . AddAnnotation ( "crio/container_id" , id )
g . AddAnnotation ( "crio/shm_path" , shmPath )
g . AddAnnotation ( "crio/privileged_runtime" , fmt . Sprintf ( "%v" , privileged ) )
g . AddAnnotation ( "crio/resolv_path" , resolvPath )
g . AddAnnotation ( "crio/hostname" , hostname )
g . AddAnnotation ( "crio/kube_name" , kubeName )
2017-05-26 16:31:28 +00:00
if podContainer . Config . Config . StopSignal != "" {
// this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
g . AddAnnotation ( "org.opencontainers.image.stopSignal" , podContainer . Config . Config . StopSignal )
}
2016-11-22 22:23:01 +00:00
2017-05-11 08:43:50 +00:00
created := time . Now ( )
2017-05-11 09:04:43 +00:00
g . AddAnnotation ( "crio/created" , created . Format ( time . RFC3339Nano ) )
2017-05-11 08:43:50 +00:00
2016-11-22 22:23:01 +00:00
sb := & sandbox {
id : id ,
2017-05-04 16:41:15 +00:00
namespace : namespace ,
2016-11-22 22:23:01 +00:00
name : name ,
2017-05-04 16:41:15 +00:00
kubeName : kubeName ,
2016-11-22 22:23:01 +00:00
logDir : logDir ,
labels : labels ,
annotations : annotations ,
containers : oci . NewMemoryStore ( ) ,
processLabel : processLabel ,
mountLabel : mountLabel ,
metadata : metadata ,
2016-12-08 23:32:17 +00:00
shmPath : shmPath ,
2017-02-21 17:19:06 +00:00
privileged : privileged ,
2017-03-24 14:28:14 +00:00
resolvPath : resolvPath ,
2017-03-29 23:11:57 +00:00
hostname : hostname ,
2016-11-22 22:23:01 +00:00
}
2017-04-04 15:22:34 +00:00
defer func ( ) {
if err != nil {
2017-04-06 15:36:26 +00:00
s . removeSandbox ( id )
2017-04-04 15:22:34 +00:00
if err2 := s . podIDIndex . Delete ( id ) ; err2 != nil {
logrus . Warnf ( "couldn't delete pod id %s from idIndex" , id )
}
}
} ( )
2016-11-22 22:23:01 +00:00
2017-04-06 15:36:26 +00:00
s . addSandbox ( sb )
if err = s . podIDIndex . Add ( id ) ; err != nil {
return nil , err
}
2016-11-22 22:23:01 +00:00
for k , v := range annotations {
g . AddAnnotation ( k , v )
}
2016-11-19 02:16:50 +00:00
// extract linux sysctls from annotations and pass down to oci runtime
safe , unsafe , err := SysctlsFromPodAnnotations ( annotations )
if err != nil {
return nil , err
}
for _ , sysctl := range safe {
g . AddLinuxSysctl ( sysctl . Name , sysctl . Value )
}
for _ , sysctl := range unsafe {
g . AddLinuxSysctl ( sysctl . Name , sysctl . Value )
}
2016-11-22 22:23:01 +00:00
// setup cgroup settings
2017-02-03 14:41:28 +00:00
cgroupParent := req . GetConfig ( ) . GetLinux ( ) . CgroupParent
2016-11-22 22:23:01 +00:00
if cgroupParent != "" {
2016-12-19 23:06:27 +00:00
if s . config . CgroupManager == "systemd" {
2017-05-18 00:46:53 +00:00
cgPath , err := convertCgroupNameToSystemd ( cgroupParent , false )
if err != nil {
return nil , err
}
g . SetLinuxCgroupsPath ( cgPath + ":" + "crio" + ":" + id )
sb . cgroupParent = cgPath
2016-12-19 23:06:27 +00:00
} else {
2017-03-04 00:38:46 +00:00
g . SetLinuxCgroupsPath ( cgroupParent + "/" + id )
2017-05-18 00:46:53 +00:00
sb . cgroupParent = cgroupParent
2016-12-19 23:06:27 +00:00
}
2016-11-22 22:23:01 +00:00
}
2017-02-03 14:41:28 +00:00
hostNetwork := req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . HostNetwork
2017-01-16 15:53:29 +00:00
2016-11-22 22:23:01 +00:00
// set up namespaces
2017-01-16 15:53:29 +00:00
if hostNetwork {
2016-11-22 22:23:01 +00:00
err = g . RemoveLinuxNamespace ( "network" )
if err != nil {
return nil , err
}
2016-11-23 17:16:21 +00:00
netNsPath , err = hostNetNsPath ( )
if err != nil {
return nil , err
}
} else {
// Create the sandbox network namespace
if err = sb . netNsCreate ( ) ; err != nil {
return nil , err
}
defer func ( ) {
if err == nil {
return
}
if netnsErr := sb . netNsRemove ( ) ; netnsErr != nil {
logrus . Warnf ( "Failed to remove networking namespace: %v" , netnsErr )
}
2016-12-13 08:34:55 +00:00
} ( )
2016-11-23 17:16:21 +00:00
// Pass the created namespace path to the runtime
err = g . AddOrReplaceLinuxNamespace ( "network" , sb . netNsPath ( ) )
if err != nil {
return nil , err
}
netNsPath = sb . netNsPath ( )
2016-11-22 22:23:01 +00:00
}
2017-02-03 14:41:28 +00:00
if req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . HostPid {
2016-11-22 22:23:01 +00:00
err = g . RemoveLinuxNamespace ( "pid" )
if err != nil {
return nil , err
}
}
2017-02-03 14:41:28 +00:00
if req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . HostIpc {
2016-11-22 22:23:01 +00:00
err = g . RemoveLinuxNamespace ( "ipc" )
if err != nil {
return nil , err
}
}
2017-02-22 00:21:04 +00:00
if ! s . seccompEnabled {
g . Spec ( ) . Linux . Seccomp = nil
}
2016-10-18 14:48:33 +00:00
saveOptions := generate . ExportOptions { }
2017-04-19 19:17:10 +00:00
mountPoint , err := s . storageRuntimeServer . StartContainer ( id )
2016-11-22 22:23:01 +00:00
if err != nil {
2016-10-18 14:48:33 +00:00
return nil , fmt . Errorf ( "failed to mount container %s in pod sandbox %s(%s): %v" , containerName , sb . name , id , err )
2016-11-22 22:23:01 +00:00
}
2016-10-18 14:48:33 +00:00
g . SetRootPath ( mountPoint )
err = g . SaveToFile ( filepath . Join ( podContainer . Dir , "config.json" ) , saveOptions )
if err != nil {
return nil , fmt . Errorf ( "failed to save template configuration for pod sandbox %s(%s): %v" , sb . name , id , err )
}
if err = g . SaveToFile ( filepath . Join ( podContainer . RunDir , "config.json" ) , saveOptions ) ; err != nil {
return nil , fmt . Errorf ( "failed to write runtime configuration for pod sandbox %s(%s): %v" , sb . name , id , err )
2016-11-22 22:23:01 +00:00
}
2017-05-26 16:31:28 +00:00
container , err := oci . NewContainer ( id , containerName , podContainer . RunDir , logPath , sb . netNs ( ) , labels , annotations , nil , nil , id , false , sb . privileged , podContainer . Dir , created , podContainer . Config . Config . StopSignal )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
sb . infraContainer = container
2016-11-25 15:24:23 +00:00
// setup the network
2017-01-16 15:53:29 +00:00
if ! hostNetwork {
2017-05-04 16:41:15 +00:00
if err = s . netPlugin . SetUpPod ( netNsPath , namespace , kubeName , id ) ; err != nil {
2017-01-16 15:53:29 +00:00
return nil , fmt . Errorf ( "failed to create network for container %s in sandbox %s: %v" , containerName , id , err )
}
2016-11-25 15:24:23 +00:00
}
2017-03-06 23:08:46 +00:00
if err = s . runContainer ( container , sb . cgroupParent ) ; err != nil {
2016-11-22 22:23:01 +00:00
return nil , err
}
2017-05-11 10:03:59 +00:00
s . containerStateToDisk ( container )
2017-02-03 14:41:28 +00:00
resp = & pb . RunPodSandboxResponse { PodSandboxId : id }
2016-11-22 22:23:01 +00:00
logrus . Debugf ( "RunPodSandboxResponse: %+v" , resp )
return resp , nil
}
2016-10-18 14:48:33 +00:00
func ( s * Server ) setPodSandboxMountLabel ( id , mountLabel string ) error {
2017-04-19 19:17:10 +00:00
storageMetadata , err := s . storageRuntimeServer . GetContainerMetadata ( id )
2016-10-18 14:48:33 +00:00
if err != nil {
return err
}
storageMetadata . SetMountLabel ( mountLabel )
2017-04-19 19:17:10 +00:00
return s . storageRuntimeServer . SetContainerMetadata ( id , storageMetadata )
2016-10-18 14:48:33 +00:00
}
2016-11-22 22:23:01 +00:00
func getSELinuxLabels ( selinuxOptions * pb . SELinuxOption ) ( processLabel string , mountLabel string , err error ) {
processLabel = ""
if selinuxOptions != nil {
2017-02-03 14:41:28 +00:00
user := selinuxOptions . User
2016-11-22 22:23:01 +00:00
if user == "" {
return "" , "" , fmt . Errorf ( "SELinuxOption.User is empty" )
}
2017-02-03 14:41:28 +00:00
role := selinuxOptions . Role
2016-11-22 22:23:01 +00:00
if role == "" {
return "" , "" , fmt . Errorf ( "SELinuxOption.Role is empty" )
}
2017-02-03 14:41:28 +00:00
t := selinuxOptions . Type
2016-11-22 22:23:01 +00:00
if t == "" {
return "" , "" , fmt . Errorf ( "SELinuxOption.Type is empty" )
}
2017-02-03 14:41:28 +00:00
level := selinuxOptions . Level
2016-11-22 22:23:01 +00:00
if level == "" {
return "" , "" , fmt . Errorf ( "SELinuxOption.Level is empty" )
}
processLabel = fmt . Sprintf ( "%s:%s:%s:%s" , user , role , t , level )
}
return label . InitLabels ( label . DupSecOpt ( processLabel ) )
}
2016-12-08 23:32:17 +00:00
2016-10-18 14:48:33 +00:00
func setupShm ( podSandboxRunDir , mountLabel string ) ( shmPath string , err error ) {
shmPath = filepath . Join ( podSandboxRunDir , "shm" )
2016-12-08 23:32:17 +00:00
if err = os . Mkdir ( shmPath , 0700 ) ; err != nil {
return "" , err
}
shmOptions := "mode=1777,size=" + strconv . Itoa ( defaultShmSize )
2016-12-09 17:37:47 +00:00
if err = syscall . Mount ( "shm" , shmPath , "tmpfs" , uintptr ( syscall . MS_NOEXEC | syscall . MS_NOSUID | syscall . MS_NODEV ) ,
label . FormatMountLabel ( shmOptions , mountLabel ) ) ; err != nil {
2016-12-08 23:32:17 +00:00
return "" , fmt . Errorf ( "failed to mount shm tmpfs for pod: %v" , err )
}
return shmPath , nil
}
2017-05-18 00:45:57 +00:00
// convertCgroupNameToSystemd converts the internal cgroup name to a systemd name.
// For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice
// If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form.
// For example, it will return /Burstable.slice/Burstable-pod_123_456.slice in above scenario.
func convertCgroupNameToSystemd ( name string , outputToCgroupFs bool ) ( systemdCgroup string , err error ) {
result := ""
if name != "" && name != "/" {
// systemd treats - as a step in the hierarchy, we convert all - to _
name = strings . Replace ( name , "-" , "_" , - 1 )
parts := strings . Split ( name , "/" )
for _ , part := range parts {
// ignore leading stuff for now
if part == "" {
continue
}
if len ( result ) > 0 {
result = result + "-"
}
result = result + part
}
} else {
// root converts to -
result = "-"
}
// always have a .slice suffix
result = result + ".slice"
// if the caller desired the result in cgroupfs format...
if outputToCgroupFs {
var err error
result , err = systemd . ExpandSlice ( result )
if err != nil {
return "" , fmt . Errorf ( "error adapting cgroup name, input: %v, err: %v" , name , err )
}
}
return result , nil
}