2016-11-22 22:23:01 +00:00
package server
import (
"encoding/json"
"fmt"
2017-09-13 08:28:41 +00:00
"io/ioutil"
2016-11-22 22:23:01 +00:00
"os"
2017-09-06 00:59:40 +00:00
"path"
2016-11-22 22:23:01 +00:00
"path/filepath"
2017-06-01 09:20:22 +00:00
"regexp"
2016-12-08 23:32:17 +00:00
"strconv"
2017-05-18 00:45:57 +00:00
"strings"
2017-05-11 08:43:50 +00:00
"time"
2016-11-22 22:23:01 +00:00
2017-05-17 17:18:35 +00:00
"github.com/containers/storage"
2017-11-30 15:46:11 +00:00
"github.com/kubernetes-incubator/cri-o/lib/sandbox"
2016-11-22 22:23:01 +00:00
"github.com/kubernetes-incubator/cri-o/oci"
2017-06-01 16:40:33 +00:00
"github.com/kubernetes-incubator/cri-o/pkg/annotations"
2017-10-19 19:12:55 +00:00
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
2016-11-22 22:23:01 +00:00
"github.com/opencontainers/runtime-tools/generate"
2017-03-22 17:58:35 +00:00
"github.com/opencontainers/selinux/go-selinux/label"
2017-08-02 15:17:45 +00:00
"github.com/pkg/errors"
2017-08-05 11:40:46 +00:00
"github.com/sirupsen/logrus"
2016-11-22 22:23:01 +00:00
"golang.org/x/net/context"
2017-06-28 15:47:31 +00:00
"golang.org/x/sys/unix"
2017-09-26 14:23:09 +00:00
"k8s.io/api/core/v1"
2017-08-04 11:13:19 +00:00
pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
2017-09-01 15:49:57 +00:00
"k8s.io/kubernetes/pkg/kubelet/leaky"
2017-06-15 20:56:17 +00:00
"k8s.io/kubernetes/pkg/kubelet/network/hostport"
2017-09-01 15:49:57 +00:00
"k8s.io/kubernetes/pkg/kubelet/types"
2016-11-22 22:23:01 +00:00
)
2017-06-23 15:13:02 +00:00
const (
// PodInfraOOMAdj is the value that we set for oom score adj for
// the pod infra container.
// TODO: Remove this const once this value is provided over CRI
// See https://github.com/kubernetes/kubernetes/issues/47938
PodInfraOOMAdj int = - 998
2017-09-06 15:04:18 +00:00
// PodInfraCPUshares is default cpu shares for sandbox container.
PodInfraCPUshares = 2
2017-06-23 15:13:02 +00:00
)
2017-02-21 17:19:06 +00:00
// privilegedSandbox returns true if the sandbox configuration
// requires additional host privileges for the sandbox.
func ( s * Server ) privilegedSandbox ( req * pb . RunPodSandboxRequest ) bool {
securityContext := req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( )
if securityContext == nil {
return false
}
if securityContext . Privileged {
return true
}
namespaceOptions := securityContext . GetNamespaceOptions ( )
if namespaceOptions == nil {
return false
}
if namespaceOptions . HostNetwork ||
namespaceOptions . HostPid ||
namespaceOptions . HostIpc {
return true
}
return false
}
2017-06-02 21:15:19 +00:00
// trustedSandbox returns true if the sandbox will run trusted workloads.
func ( s * Server ) trustedSandbox ( req * pb . RunPodSandboxRequest ) bool {
2017-06-08 12:03:24 +00:00
kubeAnnotations := req . GetConfig ( ) . GetAnnotations ( )
trustedAnnotation , ok := kubeAnnotations [ annotations . TrustedSandbox ]
if ! ok {
// A sandbox is trusted by default.
return true
}
return isTrue ( trustedAnnotation )
2017-06-02 21:15:19 +00:00
}
2017-03-06 23:08:46 +00:00
func ( s * Server ) runContainer ( container * oci . Container , cgroupParent string ) error {
2017-07-17 12:25:32 +00:00
if err := s . Runtime ( ) . CreateContainer ( container , cgroupParent ) ; err != nil {
2016-12-06 12:17:52 +00:00
return err
}
2017-10-28 20:14:53 +00:00
return s . Runtime ( ) . StartContainer ( container )
2016-12-06 12:17:52 +00:00
}
2017-06-01 09:20:22 +00:00
var (
conflictRE = regexp . MustCompile ( ` already reserved for pod "([0-9a-z]+)" ` )
)
2016-11-22 22:23:01 +00:00
// RunPodSandbox creates and runs a pod-level sandbox.
2016-11-23 17:16:21 +00:00
func ( s * Server ) RunPodSandbox ( ctx context . Context , req * pb . RunPodSandboxRequest ) ( resp * pb . RunPodSandboxResponse , err error ) {
2017-11-09 10:10:35 +00:00
const operation = "run_pod_sandbox"
defer func ( ) {
recordOperation ( operation , time . Now ( ) )
recordError ( operation , err )
} ( )
2017-04-04 15:24:55 +00:00
s . updateLock . RLock ( )
defer s . updateLock . RUnlock ( )
2018-01-12 14:45:44 +00:00
if req . GetConfig ( ) . GetMetadata ( ) == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Metadata is nil" )
}
2016-11-22 22:23:01 +00:00
logrus . Debugf ( "RunPodSandboxRequest %+v" , req )
2017-09-11 19:15:33 +00:00
var processLabel , mountLabel , resolvPath string
2016-11-22 22:23:01 +00:00
// process req.Name
2018-01-12 14:45:44 +00:00
kubeName := req . GetConfig ( ) . GetMetadata ( ) . GetName ( )
2017-05-04 16:41:15 +00:00
if kubeName == "" {
2016-11-22 22:23:01 +00:00
return nil , fmt . Errorf ( "PodSandboxConfig.Name should not be empty" )
}
2018-01-12 14:45:44 +00:00
namespace := req . GetConfig ( ) . GetMetadata ( ) . GetNamespace ( )
attempt := req . GetConfig ( ) . GetMetadata ( ) . GetAttempt ( )
2016-11-22 22:23:01 +00:00
2017-06-21 14:53:51 +00:00
id , name , err := s . generatePodIDandName ( req . GetConfig ( ) )
2016-11-22 22:23:01 +00:00
if err != nil {
2017-06-01 09:20:22 +00:00
if strings . Contains ( err . Error ( ) , "already reserved for pod" ) {
matches := conflictRE . FindStringSubmatch ( err . Error ( ) )
if len ( matches ) != 2 {
return nil , err
}
dupID := matches [ 1 ]
2017-06-17 20:55:06 +00:00
if _ , err := s . StopPodSandbox ( ctx , & pb . StopPodSandboxRequest { PodSandboxId : dupID } ) ; err != nil {
return nil , err
}
2017-06-01 09:20:22 +00:00
if _ , err := s . RemovePodSandbox ( ctx , & pb . RemovePodSandboxRequest { PodSandboxId : dupID } ) ; err != nil {
return nil , err
}
2017-06-21 14:53:51 +00:00
id , name , err = s . generatePodIDandName ( req . GetConfig ( ) )
2017-06-01 09:20:22 +00:00
if err != nil {
return nil , err
}
} else {
return nil , err
}
2016-11-22 22:23:01 +00:00
}
2017-06-01 13:34:11 +00:00
defer func ( ) {
if err != nil {
2017-07-25 15:36:33 +00:00
s . ReleasePodName ( name )
2017-06-01 13:34:11 +00:00
}
} ( )
2017-06-21 14:53:51 +00:00
_ , containerName , err := s . generateContainerIDandNameForSandbox ( req . GetConfig ( ) )
2016-10-18 14:48:33 +00:00
if err != nil {
return nil , err
}
2016-11-22 22:23:01 +00:00
defer func ( ) {
if err != nil {
2017-07-20 17:10:16 +00:00
s . ReleaseContainerName ( containerName )
2016-11-22 22:23:01 +00:00
}
} ( )
2017-07-31 18:38:45 +00:00
podContainer , err := s . StorageRuntimeServer ( ) . CreatePodSandbox ( s . ImageContext ( ) ,
2016-10-18 14:48:33 +00:00
name , id ,
s . config . PauseImage , "" ,
containerName ,
2018-01-12 14:45:44 +00:00
req . GetConfig ( ) . GetMetadata ( ) . GetName ( ) ,
req . GetConfig ( ) . GetMetadata ( ) . GetUid ( ) ,
2016-10-18 14:48:33 +00:00
namespace ,
attempt ,
nil )
2017-08-02 15:17:45 +00:00
if errors . Cause ( err ) == storage . ErrDuplicateName {
2016-10-18 14:48:33 +00:00
return nil , fmt . Errorf ( "pod sandbox with name %q already exists" , name )
}
if err != nil {
return nil , fmt . Errorf ( "error creating pod sandbox with name %q: %v" , name , err )
2016-11-22 22:23:01 +00:00
}
defer func ( ) {
if err != nil {
2017-07-31 18:38:45 +00:00
if err2 := s . StorageRuntimeServer ( ) . RemovePodSandbox ( id ) ; err2 != nil {
2016-10-18 14:48:33 +00:00
logrus . Warnf ( "couldn't cleanup pod sandbox %q: %v" , id , err2 )
2016-11-22 22:23:01 +00:00
}
}
} ( )
2016-10-18 14:48:33 +00:00
// TODO: factor generating/updating the spec into something other projects can vendor
2016-11-22 22:23:01 +00:00
// creates a spec Generator with the default spec.
g := generate . New ( )
// setup defaults for the pod sandbox
g . SetRootReadonly ( true )
2016-10-18 14:48:33 +00:00
if s . config . PauseCommand == "" {
if podContainer . Config != nil {
g . SetProcessArgs ( podContainer . Config . Config . Cmd )
} else {
2017-07-19 19:03:22 +00:00
g . SetProcessArgs ( [ ] string { sandbox . PodInfraCommand } )
2016-10-18 14:48:33 +00:00
}
} else {
g . SetProcessArgs ( [ ] string { s . config . PauseCommand } )
}
2016-11-22 22:23:01 +00:00
// set DNS options
2017-02-03 14:41:28 +00:00
if req . GetConfig ( ) . GetDnsConfig ( ) != nil {
dnsServers := req . GetConfig ( ) . GetDnsConfig ( ) . Servers
dnsSearches := req . GetConfig ( ) . GetDnsConfig ( ) . Searches
dnsOptions := req . GetConfig ( ) . GetDnsConfig ( ) . Options
2017-03-24 14:28:14 +00:00
resolvPath = fmt . Sprintf ( "%s/resolv.conf" , podContainer . RunDir )
2017-02-03 14:41:28 +00:00
err = parseDNSOptions ( dnsServers , dnsSearches , dnsOptions , resolvPath )
if err != nil {
err1 := removeFile ( resolvPath )
if err1 != nil {
err = err1
return nil , fmt . Errorf ( "%v; failed to remove %s: %v" , err , resolvPath , err1 )
}
return nil , err
2016-11-22 22:23:01 +00:00
}
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( resolvPath , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
return nil , err
}
2018-01-04 15:53:55 +00:00
mnt := runtimespec . Mount {
Type : "bind" ,
Source : resolvPath ,
Destination : "/etc/resolv.conf" ,
Options : [ ] string { "ro" , "bind" } ,
}
g . AddMount ( mnt )
2016-11-22 22:23:01 +00:00
}
// add metadata
metadata := req . GetConfig ( ) . GetMetadata ( )
metadataJSON , err := json . Marshal ( metadata )
if err != nil {
return nil , err
}
// add labels
labels := req . GetConfig ( ) . GetLabels ( )
2017-09-01 15:49:57 +00:00
2017-11-11 11:00:48 +00:00
if err := validateLabels ( labels ) ; err != nil {
return nil , err
}
2017-09-01 15:49:57 +00:00
// Add special container name label for the infra container
server: fix panic when assigning entry to nil map
When running cri-tests with cri-o, I found out that cri-o panicked
immediately with the following message. Fix it by accessing to the
labels map only if it's non-nil.
```
panic: assignment to entry in nil map
goroutine 57 [running]:
.../cri-o/server.(*Server).RunPodSandbox(0xc42048e000, 0x7efcad4cd400,
0xc42066ec90, 0xc4201703d0, 0x0, 0x0, 0x0)
.../cri-o/server/sandbox_run.go:225 +0xda5
.../cri-o/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime
._RuntimeService_RunPodSandbox_Handler(0x21793e0, 0xc42048e000,
0x7efcad4cd400, 0xc42066ec90, 0xc4204fe780, 0x0, 0x0, 0x0, 0x0, 0x0)
.../cri-o/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime/api.pb.go:3645 +0x279
.../cri-o/vendor/google.golang.org/grpc.(*Server).processUnaryRPC(0xc420
09e3c0, 0x33e79c0, 0xc4203d1950, 0xc42080a000, 0xc4202bb980, 0x33b1d58,
0xc42066ec60, 0x0, 0x0)
.../cri-o/vendor/google.golang.org/grpc/server.go:638 +0x99c
```
Signed-off-by: Dongsu Park <dongsu@kinvolk.io>
2017-09-29 14:16:38 +00:00
labelsJSON := [ ] byte { }
if labels != nil {
labels [ types . KubernetesContainerNameLabel ] = leaky . PodInfraContainerName
labelsJSON , err = json . Marshal ( labels )
if err != nil {
return nil , err
}
2016-11-22 22:23:01 +00:00
}
// add annotations
2017-06-01 16:40:33 +00:00
kubeAnnotations := req . GetConfig ( ) . GetAnnotations ( )
kubeAnnotationsJSON , err := json . Marshal ( kubeAnnotations )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
2016-10-07 15:59:39 +00:00
// set log directory
2018-02-07 16:15:49 +00:00
logDir := req . GetConfig ( ) . GetLogDirectory ( )
2016-10-07 15:59:39 +00:00
if logDir == "" {
logDir = filepath . Join ( s . config . LogDir , id )
}
if err = os . MkdirAll ( logDir , 0700 ) ; err != nil {
return nil , err
}
// This should always be absolute from k8s.
if ! filepath . IsAbs ( logDir ) {
return nil , fmt . Errorf ( "requested logDir for sbox id %s is a relative path: %s" , id , logDir )
}
2017-09-20 13:19:58 +00:00
privileged := s . privilegedSandbox ( req )
server: handle cases of securityContext, namespaceOptions being nil
Both GetSecurityContext() and GetNamespaceOptions() can return nil.
In these cases, cri-o will panic like this:
```
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x1 pc=0x1cc43f6]
goroutine 66 [running]:
.../cri-o/server.(*Server).RunPodSandbox(0xc42060e300, 0x7f611d37a0b8,
0xc420207e60, 0xc42015e318, 0x0, 0x0, 0x0)
.../cri-o/server/sandbox_run.go:261 +0xfe6
.../cri-o/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime._RuntimeService_RunPodSandbox_Handler(0x2180920,
0xc42060e300, 0x7f611d37a0b8, 0xc420207e60, 0xc420505950, 0x0, 0x0, 0x0,
0x64ed0d, 0xc42064bc80)
.../cri-o/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime/api.pb.go:3645 +0x279
.../cri-o/vendor/google.golang.org/grpc.(*Server).processUnaryRPC(0xc4200a4240,
0x33f28e0, 0xc4204b0360, 0xc42074a870, 0xc420476de0, 0x33bcd38, 0xc420207e30, 0x0, 0x0)
```
Signed-off-by: Dongsu Park <dongsu@kinvolk.io>
2017-09-29 14:44:43 +00:00
securityContext := req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( )
if securityContext == nil {
2017-10-09 19:53:54 +00:00
logrus . Warn ( "no security context found in config." )
server: handle cases of securityContext, namespaceOptions being nil
Both GetSecurityContext() and GetNamespaceOptions() can return nil.
In these cases, cri-o will panic like this:
```
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x1 pc=0x1cc43f6]
goroutine 66 [running]:
.../cri-o/server.(*Server).RunPodSandbox(0xc42060e300, 0x7f611d37a0b8,
0xc420207e60, 0xc42015e318, 0x0, 0x0, 0x0)
.../cri-o/server/sandbox_run.go:261 +0xfe6
.../cri-o/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime._RuntimeService_RunPodSandbox_Handler(0x2180920,
0xc42060e300, 0x7f611d37a0b8, 0xc420207e60, 0xc420505950, 0x0, 0x0, 0x0,
0x64ed0d, 0xc42064bc80)
.../cri-o/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime/api.pb.go:3645 +0x279
.../cri-o/vendor/google.golang.org/grpc.(*Server).processUnaryRPC(0xc4200a4240,
0x33f28e0, 0xc4204b0360, 0xc42074a870, 0xc420476de0, 0x33bcd38, 0xc420207e30, 0x0, 0x0)
```
Signed-off-by: Dongsu Park <dongsu@kinvolk.io>
2017-09-29 14:44:43 +00:00
}
processLabel , mountLabel , err = getSELinuxLabels ( securityContext . GetSelinuxOptions ( ) , privileged )
2017-09-20 13:19:58 +00:00
if err != nil {
return nil , err
}
// Don't use SELinux separation with Host Pid or IPC Namespace or privileged.
2017-10-09 19:53:54 +00:00
if securityContext . GetNamespaceOptions ( ) . GetHostPid ( ) || securityContext . GetNamespaceOptions ( ) . GetHostIpc ( ) {
2017-09-20 13:19:58 +00:00
processLabel , mountLabel = "" , ""
2016-11-22 22:23:01 +00:00
}
2017-09-20 13:19:58 +00:00
g . SetProcessSelinuxLabel ( processLabel )
g . SetLinuxMountLabel ( mountLabel )
2016-11-22 22:23:01 +00:00
2016-12-08 23:32:17 +00:00
// create shm mount for the pod containers.
var shmPath string
2017-10-09 19:53:54 +00:00
if securityContext . GetNamespaceOptions ( ) . GetHostIpc ( ) {
2016-12-08 23:32:17 +00:00
shmPath = "/dev/shm"
} else {
2016-10-18 14:48:33 +00:00
shmPath , err = setupShm ( podContainer . RunDir , mountLabel )
2016-12-08 23:32:17 +00:00
if err != nil {
return nil , err
}
defer func ( ) {
if err != nil {
2017-06-28 15:47:31 +00:00
if err2 := unix . Unmount ( shmPath , unix . MNT_DETACH ) ; err2 != nil {
2016-12-08 23:32:17 +00:00
logrus . Warnf ( "failed to unmount shm for pod: %v" , err2 )
}
}
} ( )
}
2016-10-18 14:48:33 +00:00
err = s . setPodSandboxMountLabel ( id , mountLabel )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
2017-07-17 12:25:32 +00:00
if err = s . CtrIDIndex ( ) . Add ( id ) ; err != nil {
2016-11-22 22:23:01 +00:00
return nil , err
}
defer func ( ) {
if err != nil {
2017-07-17 12:25:32 +00:00
if err2 := s . CtrIDIndex ( ) . Delete ( id ) ; err2 != nil {
2016-10-18 14:48:33 +00:00
logrus . Warnf ( "couldn't delete ctr id %s from idIndex" , id )
2016-11-22 22:23:01 +00:00
}
}
} ( )
2016-10-07 15:59:39 +00:00
// set log path inside log directory
logPath := filepath . Join ( logDir , id + ".log" )
2017-02-21 17:19:06 +00:00
2017-04-04 14:11:53 +00:00
// Handle https://issues.k8s.io/44043
if err := ensureSaneLogPath ( logPath ) ; err != nil {
return nil , err
}
2017-10-09 19:53:54 +00:00
hostNetwork := securityContext . GetNamespaceOptions ( ) . GetHostNetwork ( )
2017-09-13 08:28:41 +00:00
hostname , err := getHostname ( id , req . GetConfig ( ) . Hostname , hostNetwork )
if err != nil {
return nil , err
}
g . SetHostname ( hostname )
2017-06-02 21:15:19 +00:00
trusted := s . trustedSandbox ( req )
2017-06-01 16:40:33 +00:00
g . AddAnnotation ( annotations . Metadata , string ( metadataJSON ) )
g . AddAnnotation ( annotations . Labels , string ( labelsJSON ) )
g . AddAnnotation ( annotations . Annotations , string ( kubeAnnotationsJSON ) )
g . AddAnnotation ( annotations . LogPath , logPath )
g . AddAnnotation ( annotations . Name , name )
2018-02-07 16:15:49 +00:00
g . AddAnnotation ( annotations . Namespace , namespace )
2017-06-01 16:40:33 +00:00
g . AddAnnotation ( annotations . ContainerType , annotations . ContainerTypeSandbox )
g . AddAnnotation ( annotations . SandboxID , id )
g . AddAnnotation ( annotations . ContainerName , containerName )
g . AddAnnotation ( annotations . ContainerID , id )
g . AddAnnotation ( annotations . ShmPath , shmPath )
g . AddAnnotation ( annotations . PrivilegedRuntime , fmt . Sprintf ( "%v" , privileged ) )
2017-06-02 21:15:19 +00:00
g . AddAnnotation ( annotations . TrustedSandbox , fmt . Sprintf ( "%v" , trusted ) )
2017-06-01 16:40:33 +00:00
g . AddAnnotation ( annotations . ResolvPath , resolvPath )
g . AddAnnotation ( annotations . HostName , hostname )
g . AddAnnotation ( annotations . KubeName , kubeName )
2017-05-26 16:31:28 +00:00
if podContainer . Config . Config . StopSignal != "" {
// this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
g . AddAnnotation ( "org.opencontainers.image.stopSignal" , podContainer . Config . Config . StopSignal )
}
2016-11-22 22:23:01 +00:00
2017-05-11 08:43:50 +00:00
created := time . Now ( )
2017-06-01 16:40:33 +00:00
g . AddAnnotation ( annotations . Created , created . Format ( time . RFC3339Nano ) )
2017-05-11 08:43:50 +00:00
2017-06-15 20:56:17 +00:00
portMappings := convertPortMappings ( req . GetConfig ( ) . GetPortMappings ( ) )
2017-07-19 19:03:22 +00:00
// setup cgroup settings
2017-10-09 19:53:54 +00:00
cgroupParent := req . GetConfig ( ) . GetLinux ( ) . GetCgroupParent ( )
2017-07-19 19:03:22 +00:00
if cgroupParent != "" {
2017-08-29 21:11:30 +00:00
if s . config . CgroupManager == oci . SystemdCgroupsManager {
2017-09-06 01:00:14 +00:00
if len ( cgroupParent ) <= 6 || ! strings . HasSuffix ( path . Base ( cgroupParent ) , ".slice" ) {
return nil , fmt . Errorf ( "cri-o configured with systemd cgroup manager, but did not receive slice as parent: %s" , cgroupParent )
}
cgPath , err := convertCgroupFsNameToSystemd ( cgroupParent )
2017-07-19 19:03:22 +00:00
if err != nil {
return nil , err
}
g . SetLinuxCgroupsPath ( cgPath + ":" + "crio" + ":" + id )
cgroupParent = cgPath
} else {
2017-09-06 01:00:14 +00:00
if strings . HasSuffix ( path . Base ( cgroupParent ) , ".slice" ) {
return nil , fmt . Errorf ( "cri-o configured with cgroupfs cgroup manager, but received systemd slice as parent: %s" , cgroupParent )
}
cgPath := filepath . Join ( cgroupParent , scopePrefix + "-" + id )
g . SetLinuxCgroupsPath ( cgPath )
2017-07-19 19:03:22 +00:00
}
}
2018-02-07 16:15:49 +00:00
g . AddAnnotation ( annotations . CgroupParent , cgroupParent )
2017-07-19 19:03:22 +00:00
sb , err := sandbox . New ( id , namespace , name , kubeName , logDir , labels , kubeAnnotations , processLabel , mountLabel , metadata , shmPath , cgroupParent , privileged , trusted , resolvPath , hostname , portMappings )
if err != nil {
return nil , err
2016-11-22 22:23:01 +00:00
}
2017-06-09 11:57:45 +00:00
s . addSandbox ( sb )
2017-04-04 15:22:34 +00:00
defer func ( ) {
if err != nil {
2017-04-06 15:36:26 +00:00
s . removeSandbox ( id )
2017-04-04 15:22:34 +00:00
}
} ( )
2016-11-22 22:23:01 +00:00
2017-07-25 15:36:33 +00:00
if err = s . PodIDIndex ( ) . Add ( id ) ; err != nil {
2017-04-06 15:36:26 +00:00
return nil , err
}
2017-06-01 13:34:11 +00:00
defer func ( ) {
if err != nil {
2017-07-25 15:36:33 +00:00
if err := s . PodIDIndex ( ) . Delete ( id ) ; err != nil {
2017-06-01 13:34:11 +00:00
logrus . Warnf ( "couldn't delete pod id %s from idIndex" , id )
}
}
} ( )
2017-06-01 16:40:33 +00:00
for k , v := range kubeAnnotations {
2016-11-22 22:23:01 +00:00
g . AddAnnotation ( k , v )
}
2017-09-04 16:11:32 +00:00
for k , v := range labels {
g . AddAnnotation ( k , v )
}
2016-11-22 22:23:01 +00:00
2016-11-19 02:16:50 +00:00
// extract linux sysctls from annotations and pass down to oci runtime
2017-10-19 22:23:41 +00:00
for key , value := range req . GetConfig ( ) . GetLinux ( ) . GetSysctls ( ) {
g . AddLinuxSysctl ( key , value )
2016-11-19 02:16:50 +00:00
}
2017-06-23 15:13:02 +00:00
// Set OOM score adjust of the infra container to be very low
// so it doesn't get killed.
2017-07-20 04:07:01 +00:00
g . SetProcessOOMScoreAdj ( PodInfraOOMAdj )
2017-06-23 15:13:02 +00:00
2017-09-06 15:04:18 +00:00
g . SetLinuxResourcesCPUShares ( PodInfraCPUshares )
2016-11-22 22:23:01 +00:00
// set up namespaces
2017-01-16 15:53:29 +00:00
if hostNetwork {
2017-10-19 19:12:55 +00:00
err = g . RemoveLinuxNamespace ( string ( runtimespec . NetworkNamespace ) )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
2016-11-23 17:16:21 +00:00
} else {
// Create the sandbox network namespace
2017-07-18 20:35:15 +00:00
if err = sb . NetNsCreate ( ) ; err != nil {
2016-11-23 17:16:21 +00:00
return nil , err
}
defer func ( ) {
if err == nil {
return
}
2017-07-18 20:35:15 +00:00
if netnsErr := sb . NetNsRemove ( ) ; netnsErr != nil {
2016-11-23 17:16:21 +00:00
logrus . Warnf ( "Failed to remove networking namespace: %v" , netnsErr )
}
2016-12-13 08:34:55 +00:00
} ( )
2016-11-23 17:16:21 +00:00
// Pass the created namespace path to the runtime
2017-10-19 19:12:55 +00:00
err = g . AddOrReplaceLinuxNamespace ( string ( runtimespec . NetworkNamespace ) , sb . NetNsPath ( ) )
2016-11-23 17:16:21 +00:00
if err != nil {
return nil , err
}
2016-11-22 22:23:01 +00:00
}
2017-10-09 19:53:54 +00:00
if securityContext . GetNamespaceOptions ( ) . GetHostPid ( ) {
2017-10-19 19:12:55 +00:00
err = g . RemoveLinuxNamespace ( string ( runtimespec . PIDNamespace ) )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
}
2017-10-09 19:53:54 +00:00
if securityContext . GetNamespaceOptions ( ) . GetHostIpc ( ) {
2017-10-19 19:12:55 +00:00
err = g . RemoveLinuxNamespace ( string ( runtimespec . IPCNamespace ) )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
}
2017-02-22 00:21:04 +00:00
if ! s . seccompEnabled {
g . Spec ( ) . Linux . Seccomp = nil
}
2016-10-18 14:48:33 +00:00
saveOptions := generate . ExportOptions { }
2017-07-31 18:38:45 +00:00
mountPoint , err := s . StorageRuntimeServer ( ) . StartContainer ( id )
2016-11-22 22:23:01 +00:00
if err != nil {
2017-07-19 19:03:22 +00:00
return nil , fmt . Errorf ( "failed to mount container %s in pod sandbox %s(%s): %v" , containerName , sb . Name ( ) , id , err )
2016-11-22 22:23:01 +00:00
}
2017-08-31 13:16:25 +00:00
g . AddAnnotation ( annotations . MountPoint , mountPoint )
2016-10-18 14:48:33 +00:00
g . SetRootPath ( mountPoint )
2016-11-22 22:23:01 +00:00
2017-09-13 08:28:41 +00:00
hostnamePath := fmt . Sprintf ( "%s/hostname" , podContainer . RunDir )
if err := ioutil . WriteFile ( hostnamePath , [ ] byte ( hostname + "\n" ) , 0644 ) ; err != nil {
return nil , err
}
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( hostnamePath , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
return nil , err
}
2018-01-04 15:53:55 +00:00
mnt := runtimespec . Mount {
Type : "bind" ,
Source : hostnamePath ,
Destination : "/etc/hostname" ,
Options : [ ] string { "ro" , "bind" } ,
}
g . AddMount ( mnt )
2017-09-13 08:28:41 +00:00
g . AddAnnotation ( annotations . HostnamePath , hostnamePath )
sb . AddHostnamePath ( hostnamePath )
2017-09-22 23:44:02 +00:00
container , err := oci . NewContainer ( id , containerName , podContainer . RunDir , logPath , sb . NetNs ( ) , labels , g . Spec ( ) . Annotations , kubeAnnotations , "" , "" , "" , nil , id , false , false , false , sb . Privileged ( ) , sb . Trusted ( ) , podContainer . Dir , created , podContainer . Config . Config . StopSignal )
2016-11-22 22:23:01 +00:00
if err != nil {
return nil , err
}
2017-10-19 13:02:56 +00:00
container . SetSpec ( g . Spec ( ) )
2017-08-31 13:16:25 +00:00
container . SetMountPoint ( mountPoint )
2016-11-22 22:23:01 +00:00
2017-07-19 19:03:22 +00:00
sb . SetInfraContainer ( container )
2016-11-22 22:23:01 +00:00
2017-08-29 23:00:49 +00:00
var ip string
2017-09-11 19:15:33 +00:00
ip , err = s . networkStart ( hostNetwork , sb )
if err != nil {
return nil , err
2016-11-25 15:24:23 +00:00
}
2017-09-11 19:15:33 +00:00
defer func ( ) {
if err != nil {
s . networkStop ( hostNetwork , sb )
}
} ( )
2016-11-25 15:24:23 +00:00
2017-08-29 23:00:49 +00:00
g . AddAnnotation ( annotations . IP , ip )
sb . AddIP ( ip )
2017-09-27 18:46:31 +00:00
spp := req . GetConfig ( ) . GetLinux ( ) . GetSecurityContext ( ) . GetSeccompProfilePath ( )
g . AddAnnotation ( annotations . SeccompProfilePath , spp )
sb . SetSeccompProfilePath ( spp )
if ! privileged {
if err = s . setupSeccomp ( & g , spp ) ; err != nil {
return nil , err
}
}
2017-08-29 22:57:26 +00:00
err = g . SaveToFile ( filepath . Join ( podContainer . Dir , "config.json" ) , saveOptions )
if err != nil {
return nil , fmt . Errorf ( "failed to save template configuration for pod sandbox %s(%s): %v" , sb . Name ( ) , id , err )
}
if err = g . SaveToFile ( filepath . Join ( podContainer . RunDir , "config.json" ) , saveOptions ) ; err != nil {
return nil , fmt . Errorf ( "failed to write runtime configuration for pod sandbox %s(%s): %v" , sb . Name ( ) , id , err )
}
2017-07-19 19:03:22 +00:00
if err = s . runContainer ( container , sb . CgroupParent ( ) ) ; err != nil {
2016-11-22 22:23:01 +00:00
return nil , err
}
2017-09-01 15:49:57 +00:00
s . addInfraContainer ( container )
2017-07-20 17:05:12 +00:00
s . ContainerStateToDisk ( container )
2017-05-11 10:03:59 +00:00
2017-02-03 14:41:28 +00:00
resp = & pb . RunPodSandboxResponse { PodSandboxId : id }
2016-11-22 22:23:01 +00:00
logrus . Debugf ( "RunPodSandboxResponse: %+v" , resp )
return resp , nil
}
2017-06-15 20:56:17 +00:00
func convertPortMappings ( in [ ] * pb . PortMapping ) [ ] * hostport . PortMapping {
if in == nil {
return nil
}
out := make ( [ ] * hostport . PortMapping , len ( in ) )
for i , v := range in {
out [ i ] = & hostport . PortMapping {
HostPort : v . HostPort ,
ContainerPort : v . ContainerPort ,
Protocol : v1 . Protocol ( v . Protocol . String ( ) ) ,
HostIP : v . HostIp ,
}
}
return out
}
2017-09-13 08:28:41 +00:00
func getHostname ( id , hostname string , hostNetwork bool ) ( string , error ) {
if hostNetwork {
if hostname == "" {
h , err := os . Hostname ( )
if err != nil {
return "" , err
}
hostname = h
}
} else {
if hostname == "" {
hostname = id [ : 12 ]
}
}
return hostname , nil
}
2016-10-18 14:48:33 +00:00
func ( s * Server ) setPodSandboxMountLabel ( id , mountLabel string ) error {
2017-07-31 18:38:45 +00:00
storageMetadata , err := s . StorageRuntimeServer ( ) . GetContainerMetadata ( id )
2016-10-18 14:48:33 +00:00
if err != nil {
return err
}
storageMetadata . SetMountLabel ( mountLabel )
2017-07-31 18:38:45 +00:00
return s . StorageRuntimeServer ( ) . SetContainerMetadata ( id , storageMetadata )
2016-10-18 14:48:33 +00:00
}
2017-09-20 13:19:58 +00:00
func getSELinuxLabels ( selinuxOptions * pb . SELinuxOption , privileged bool ) ( processLabel string , mountLabel string , err error ) {
if privileged {
return "" , "" , nil
}
2017-09-13 19:06:54 +00:00
labels := [ ] string { }
2016-11-22 22:23:01 +00:00
if selinuxOptions != nil {
2017-09-13 19:06:54 +00:00
if selinuxOptions . User != "" {
labels = append ( labels , "user:" + selinuxOptions . User )
2016-11-22 22:23:01 +00:00
}
2017-09-13 19:06:54 +00:00
if selinuxOptions . Role != "" {
labels = append ( labels , "role:" + selinuxOptions . Role )
2016-11-22 22:23:01 +00:00
}
2017-09-13 19:06:54 +00:00
if selinuxOptions . Type != "" {
labels = append ( labels , "type:" + selinuxOptions . Type )
2016-11-22 22:23:01 +00:00
}
2017-09-13 19:06:54 +00:00
if selinuxOptions . Level != "" {
labels = append ( labels , "level:" + selinuxOptions . Level )
2016-11-22 22:23:01 +00:00
}
}
2017-09-13 19:06:54 +00:00
return label . InitLabels ( labels )
2016-11-22 22:23:01 +00:00
}
2016-12-08 23:32:17 +00:00
2016-10-18 14:48:33 +00:00
func setupShm ( podSandboxRunDir , mountLabel string ) ( shmPath string , err error ) {
shmPath = filepath . Join ( podSandboxRunDir , "shm" )
2016-12-08 23:32:17 +00:00
if err = os . Mkdir ( shmPath , 0700 ) ; err != nil {
return "" , err
}
2017-07-19 19:03:22 +00:00
shmOptions := "mode=1777,size=" + strconv . Itoa ( sandbox . DefaultShmSize )
2017-06-28 15:47:31 +00:00
if err = unix . Mount ( "shm" , shmPath , "tmpfs" , unix . MS_NOEXEC | unix . MS_NOSUID | unix . MS_NODEV ,
2016-12-09 17:37:47 +00:00
label . FormatMountLabel ( shmOptions , mountLabel ) ) ; err != nil {
2016-12-08 23:32:17 +00:00
return "" , fmt . Errorf ( "failed to mount shm tmpfs for pod: %v" , err )
}
return shmPath , nil
}
2017-05-18 00:45:57 +00:00
2017-09-06 00:59:40 +00:00
// convertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name.
// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice
// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those
// code areas could use something from libcontainer if we get this style function upstream.
func convertCgroupFsNameToSystemd ( cgroupfsName string ) ( string , error ) {
// TODO: see if libcontainer systemd implementation could use something similar, and if so, move
// this function up to that library. At that time, it would most likely do validation specific to systemd
// above and beyond the simple assumption here that the base of the path encodes the hierarchy
// per systemd convention.
return path . Base ( cgroupfsName ) , nil
2017-05-18 00:45:57 +00:00
}