2016-11-22 16:49:54 +00:00
package server
import (
"encoding/json"
"errors"
"fmt"
2017-03-29 18:16:53 +00:00
"io"
"os"
2016-11-22 16:49:54 +00:00
"path/filepath"
2017-03-29 18:23:33 +00:00
"strconv"
2016-11-23 09:41:48 +00:00
"strings"
2017-05-11 09:22:47 +00:00
"time"
2016-11-22 16:49:54 +00:00
2017-08-14 19:29:53 +00:00
"github.com/docker/distribution/reference"
2017-07-14 22:32:25 +00:00
"github.com/docker/docker/pkg/stringid"
2017-03-29 18:16:53 +00:00
"github.com/docker/docker/pkg/symlink"
2017-07-25 19:16:43 +00:00
"github.com/kubernetes-incubator/cri-o/libkpod"
2017-07-19 19:03:22 +00:00
"github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
2016-11-22 16:49:54 +00:00
"github.com/kubernetes-incubator/cri-o/oci"
2017-06-01 16:40:33 +00:00
"github.com/kubernetes-incubator/cri-o/pkg/annotations"
2017-07-14 22:32:25 +00:00
"github.com/kubernetes-incubator/cri-o/pkg/storage"
2016-11-29 12:34:15 +00:00
"github.com/kubernetes-incubator/cri-o/server/apparmor"
2016-11-23 09:41:48 +00:00
"github.com/kubernetes-incubator/cri-o/server/seccomp"
2017-03-16 09:59:41 +00:00
"github.com/opencontainers/image-spec/specs-go/v1"
2017-07-07 21:43:35 +00:00
"github.com/opencontainers/runc/libcontainer/cgroups"
2017-05-08 22:10:09 +00:00
"github.com/opencontainers/runc/libcontainer/devices"
2017-03-29 18:18:35 +00:00
"github.com/opencontainers/runc/libcontainer/user"
2017-05-08 22:10:09 +00:00
rspec "github.com/opencontainers/runtime-spec/specs-go"
2016-11-22 16:49:54 +00:00
"github.com/opencontainers/runtime-tools/generate"
2017-03-22 17:58:35 +00:00
"github.com/opencontainers/selinux/go-selinux/label"
2017-08-05 11:40:46 +00:00
"github.com/sirupsen/logrus"
2016-11-22 16:49:54 +00:00
"golang.org/x/net/context"
2017-06-28 15:47:31 +00:00
"golang.org/x/sys/unix"
2017-08-04 11:13:19 +00:00
pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
2016-11-22 16:49:54 +00:00
)
2016-11-23 09:41:48 +00:00
const (
seccompUnconfined = "unconfined"
seccompRuntimeDefault = "runtime/default"
seccompLocalhostPrefix = "localhost/"
2017-08-29 21:11:30 +00:00
scopePrefix = "crio"
defaultCgroupfsParent = "/crio"
defaultSystemdParent = "system.slice"
2016-11-23 09:41:48 +00:00
)
2017-08-14 19:52:25 +00:00
func addOCIBindMounts ( sb * sandbox . Sandbox , containerConfig * pb . ContainerConfig , specgen * generate . Generator ) ( [ ] oci . ContainerVolume , error ) {
volumes := [ ] oci . ContainerVolume { }
2017-02-22 18:42:44 +00:00
mounts := containerConfig . GetMounts ( )
for _ , mount := range mounts {
dest := mount . ContainerPath
if dest == "" {
2017-08-14 19:52:25 +00:00
return nil , fmt . Errorf ( "Mount.ContainerPath is empty" )
2017-02-22 18:42:44 +00:00
}
src := mount . HostPath
if src == "" {
2017-08-14 19:52:25 +00:00
return nil , fmt . Errorf ( "Mount.HostPath is empty" )
2017-02-22 18:42:44 +00:00
}
2017-06-01 14:09:39 +00:00
if _ , err := os . Stat ( src ) ; err != nil && os . IsNotExist ( err ) {
2017-06-16 22:49:16 +00:00
if err1 := os . MkdirAll ( src , 0644 ) ; err1 != nil {
2017-08-14 19:52:25 +00:00
return nil , fmt . Errorf ( "Failed to mkdir %s: %s" , src , err )
2017-06-16 22:49:16 +00:00
}
2017-06-01 14:09:39 +00:00
}
2017-02-22 18:42:44 +00:00
options := [ ] string { "rw" }
if mount . Readonly {
options = [ ] string { "ro" }
}
2017-08-16 14:37:40 +00:00
options = append ( options , [ ] string { "rbind" , "rprivate" } ... )
2017-02-22 18:42:44 +00:00
if mount . SelinuxRelabel {
// Need a way in kubernetes to determine if the volume is shared or private
2017-07-19 19:03:22 +00:00
if err := label . Relabel ( src , sb . MountLabel ( ) , true ) ; err != nil && err != unix . ENOTSUP {
2017-08-14 19:52:25 +00:00
return nil , fmt . Errorf ( "relabel failed %s: %v" , src , err )
2017-02-22 18:42:44 +00:00
}
}
2017-08-14 19:52:25 +00:00
volumes = append ( volumes , oci . ContainerVolume {
ContainerPath : dest ,
HostPath : src ,
Readonly : mount . Readonly ,
} )
2017-02-22 18:42:44 +00:00
specgen . AddBindMount ( src , dest , options )
}
2017-08-14 19:52:25 +00:00
return volumes , nil
2017-02-22 18:42:44 +00:00
}
2017-07-14 22:32:25 +00:00
func addImageVolumes ( rootfs string , s * Server , containerInfo * storage . ContainerInfo , specgen * generate . Generator , mountLabel string ) error {
for dest := range containerInfo . Config . Config . Volumes {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , dest ) , rootfs )
if err != nil {
return err
}
switch s . config . ImageVolumes {
2017-07-25 19:16:43 +00:00
case libkpod . ImageVolumesMkdir :
2017-07-14 22:32:25 +00:00
if err1 := os . MkdirAll ( fp , 0644 ) ; err1 != nil {
return err1
}
2017-07-25 19:16:43 +00:00
case libkpod . ImageVolumesBind :
2017-07-14 22:32:25 +00:00
volumeDirName := stringid . GenerateNonCryptoID ( )
src := filepath . Join ( containerInfo . RunDir , "mounts" , volumeDirName )
if err1 := os . MkdirAll ( src , 0644 ) ; err1 != nil {
return err1
}
// Label the source with the sandbox selinux mount label
if mountLabel != "" {
if err1 := label . Relabel ( src , mountLabel , true ) ; err1 != nil && err1 != unix . ENOTSUP {
return fmt . Errorf ( "relabel failed %s: %v" , src , err1 )
}
}
logrus . Debugf ( "Adding bind mounted volume: %s to %s" , src , dest )
specgen . AddBindMount ( src , dest , [ ] string { "rw" } )
2017-07-25 19:16:43 +00:00
case libkpod . ImageVolumesIgnore :
2017-07-14 22:32:25 +00:00
logrus . Debugf ( "Ignoring volume %v" , dest )
default :
logrus . Fatalf ( "Unrecognized image volumes setting" )
}
}
return nil
}
2017-09-06 15:04:18 +00:00
// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved
// path; if not, returns the original path.
func resolveSymbolicLink ( path string ) ( string , error ) {
info , err := os . Lstat ( path )
if err != nil {
return "" , err
}
if info . Mode ( ) & os . ModeSymlink != os . ModeSymlink {
return path , nil
}
return filepath . EvalSymlinks ( path )
}
2017-07-19 19:03:22 +00:00
func addDevices ( sb * sandbox . Sandbox , containerConfig * pb . ContainerConfig , specgen * generate . Generator ) error {
2017-05-08 22:10:09 +00:00
sp := specgen . Spec ( )
2017-09-06 15:04:18 +00:00
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . Privileged {
hostDevices , err := devices . HostDevices ( )
if err != nil {
return err
}
for _ , hostDevice := range hostDevices {
rd := rspec . LinuxDevice {
Path : hostDevice . Path ,
Type : string ( hostDevice . Type ) ,
Major : hostDevice . Major ,
Minor : hostDevice . Minor ,
UID : & hostDevice . Uid ,
GID : & hostDevice . Gid ,
}
if hostDevice . Major == 0 && hostDevice . Minor == 0 {
// Invalid device, most likely a symbolic link, skip it.
continue
}
specgen . AddDevice ( rd )
}
sp . Linux . Resources . Devices = [ ] rspec . LinuxDeviceCgroup {
{
Allow : true ,
Access : "rwm" ,
} ,
}
return nil
}
2017-05-08 22:10:09 +00:00
for _ , device := range containerConfig . GetDevices ( ) {
2017-09-06 15:04:18 +00:00
path , err := resolveSymbolicLink ( device . HostPath )
2017-05-08 22:10:09 +00:00
if err != nil {
2017-09-06 15:04:18 +00:00
return err
2017-05-08 22:10:09 +00:00
}
2017-09-06 15:04:18 +00:00
dev , err := devices . DeviceFromPath ( path , device . Permissions )
// if there was no error, return the device
if err == nil {
rd := rspec . LinuxDevice {
Path : device . ContainerPath ,
Type : string ( dev . Type ) ,
Major : dev . Major ,
Minor : dev . Minor ,
UID : & dev . Uid ,
GID : & dev . Gid ,
}
specgen . AddDevice ( rd )
sp . Linux . Resources . Devices = append ( sp . Linux . Resources . Devices , rspec . LinuxDeviceCgroup {
Allow : true ,
Type : string ( dev . Type ) ,
Major : & dev . Major ,
Minor : & dev . Minor ,
Access : dev . Permissions ,
} )
continue
}
// if the device is not a device node
// try to see if it's a directory holding many devices
if err == devices . ErrNotADevice {
// check if it is a directory
if src , e := os . Stat ( path ) ; e == nil && src . IsDir ( ) {
// mount the internal devices recursively
filepath . Walk ( path , func ( dpath string , f os . FileInfo , e error ) error {
childDevice , e := devices . DeviceFromPath ( dpath , device . Permissions )
if e != nil {
// ignore the device
return nil
}
cPath := strings . Replace ( dpath , path , device . ContainerPath , 1 )
rd := rspec . LinuxDevice {
Path : cPath ,
Type : string ( childDevice . Type ) ,
Major : childDevice . Major ,
Minor : childDevice . Minor ,
UID : & childDevice . Uid ,
GID : & childDevice . Gid ,
}
specgen . AddDevice ( rd )
sp . Linux . Resources . Devices = append ( sp . Linux . Resources . Devices , rspec . LinuxDeviceCgroup {
Allow : true ,
Type : string ( childDevice . Type ) ,
Major : & childDevice . Major ,
Minor : & childDevice . Minor ,
Access : childDevice . Permissions ,
} )
return nil
} )
}
2017-05-08 22:10:09 +00:00
}
}
return nil
}
2017-03-16 09:59:41 +00:00
// buildOCIProcessArgs build an OCI compatible process arguments slice.
func buildOCIProcessArgs ( containerKubeConfig * pb . ContainerConfig , imageOCIConfig * v1 . Image ) ( [ ] string , error ) {
2017-06-07 16:38:04 +00:00
//# Start the nginx container using the default command, but use custom
//arguments (arg1 .. argN) for that command.
//kubectl run nginx --image=nginx -- <arg1> <arg2> ... <argN>
//# Start the nginx container using a different command and custom arguments.
//kubectl run nginx --image=nginx --command -- <cmd> <arg1> ... <argN>
2017-03-16 09:59:41 +00:00
kubeCommands := containerKubeConfig . Command
kubeArgs := containerKubeConfig . Args
2017-06-07 16:38:04 +00:00
// merge image config and kube config
// same as docker does today...
if imageOCIConfig != nil {
if len ( kubeCommands ) == 0 {
if len ( kubeArgs ) == 0 {
kubeArgs = imageOCIConfig . Config . Cmd
}
if kubeCommands == nil {
kubeCommands = imageOCIConfig . Config . Entrypoint
}
}
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
if len ( kubeCommands ) == 0 && len ( kubeArgs ) == 0 {
return nil , fmt . Errorf ( "no command specified" )
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
// create entrypoint and args
var entrypoint string
var args [ ] string
if len ( kubeCommands ) != 0 {
entrypoint = kubeCommands [ 0 ]
args = append ( kubeCommands [ 1 : ] , kubeArgs ... )
2017-03-16 09:59:41 +00:00
} else {
2017-06-07 16:38:04 +00:00
entrypoint = kubeArgs [ 0 ]
args = kubeArgs [ 1 : ]
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
processArgs := append ( [ ] string { entrypoint } , args ... )
2017-03-16 09:59:41 +00:00
logrus . Debugf ( "OCI process args %v" , processArgs )
return processArgs , nil
}
2017-03-29 18:23:33 +00:00
// setupContainerUser sets the UID, GID and supplemental groups in OCI runtime config
func setupContainerUser ( specgen * generate . Generator , rootfs string , sc * pb . LinuxContainerSecurityContext , imageConfig * v1 . Image ) error {
if sc != nil {
containerUser := ""
// Case 1: run as user is set by kubelet
if sc . RunAsUser != nil {
containerUser = strconv . FormatInt ( sc . GetRunAsUser ( ) . Value , 10 )
} else {
// Case 2: run as username is set by kubelet
userName := sc . RunAsUsername
if userName != "" {
containerUser = userName
} else {
// Case 3: get user from image config
2017-04-04 22:39:59 +00:00
if imageConfig != nil {
imageUser := imageConfig . Config . User
if imageUser != "" {
containerUser = imageUser
}
2017-03-29 18:23:33 +00:00
}
}
}
logrus . Debugf ( "CONTAINER USER: %+v" , containerUser )
// Add uid, gid and groups from user
uid , gid , addGroups , err1 := getUserInfo ( rootfs , containerUser )
if err1 != nil {
return err1
}
logrus . Debugf ( "UID: %v, GID: %v, Groups: %+v" , uid , gid , addGroups )
specgen . SetProcessUID ( uid )
specgen . SetProcessGID ( gid )
for _ , group := range addGroups {
specgen . AddProcessAdditionalGid ( group )
}
// Add groups from CRI
groups := sc . SupplementalGroups
for _ , group := range groups {
specgen . AddProcessAdditionalGid ( uint32 ( group ) )
}
}
return nil
}
2017-04-22 00:54:29 +00:00
func hostNetwork ( containerConfig * pb . ContainerConfig ) bool {
securityContext := containerConfig . GetLinux ( ) . GetSecurityContext ( )
if securityContext == nil || securityContext . GetNamespaceOptions ( ) == nil {
return false
}
return securityContext . GetNamespaceOptions ( ) . HostNetwork
}
2017-04-04 14:11:53 +00:00
// ensureSaneLogPath is a hack to fix https://issues.k8s.io/44043 which causes
// logPath to be a broken symlink to some magical Docker path. Ideally we
// wouldn't have to deal with this, but until that issue is fixed we have to
// remove the path if it's a broken symlink.
func ensureSaneLogPath ( logPath string ) error {
// If the path exists but the resolved path does not, then we have a broken
// symlink and we need to remove it.
fi , err := os . Lstat ( logPath )
if err != nil || fi . Mode ( ) & os . ModeSymlink == 0 {
2017-08-15 02:15:01 +00:00
// Non-existent files and non-symlinks aren't our problem.
2017-04-04 14:11:53 +00:00
return nil
}
_ , err = os . Stat ( logPath )
if os . IsNotExist ( err ) {
err = os . RemoveAll ( logPath )
if err != nil {
return fmt . Errorf ( "ensureSaneLogPath remove bad logPath: %s" , err )
}
}
return nil
}
2016-11-22 16:49:54 +00:00
// CreateContainer creates a new container in specified PodSandbox
func ( s * Server ) CreateContainer ( ctx context . Context , req * pb . CreateContainerRequest ) ( res * pb . CreateContainerResponse , err error ) {
logrus . Debugf ( "CreateContainerRequest %+v" , req )
2017-04-04 15:24:55 +00:00
s . updateLock . RLock ( )
defer s . updateLock . RUnlock ( )
2017-02-03 14:41:28 +00:00
sbID := req . PodSandboxId
2016-11-22 16:49:54 +00:00
if sbID == "" {
return nil , fmt . Errorf ( "PodSandboxId should not be empty" )
}
2017-07-25 15:36:33 +00:00
sandboxID , err := s . PodIDIndex ( ) . Get ( sbID )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , fmt . Errorf ( "PodSandbox with ID starting with %s not found: %v" , sbID , err )
}
sb := s . getSandbox ( sandboxID )
if sb == nil {
return nil , fmt . Errorf ( "specified sandbox not found: %s" , sandboxID )
}
// The config of the container
containerConfig := req . GetConfig ( )
if containerConfig == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig is nil" )
}
2017-02-03 14:41:28 +00:00
name := containerConfig . GetMetadata ( ) . Name
2016-11-22 16:49:54 +00:00
if name == "" {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Name is empty" )
}
2017-07-19 19:03:22 +00:00
containerID , containerName , err := s . generateContainerIDandName ( sb . Metadata ( ) , containerConfig )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
defer func ( ) {
if err != nil {
2017-07-20 17:10:16 +00:00
s . ReleaseContainerName ( containerName )
2016-11-22 16:49:54 +00:00
}
} ( )
2016-10-18 14:48:33 +00:00
container , err := s . createSandboxContainer ( ctx , containerID , containerName , sb , req . GetSandboxConfig ( ) , containerConfig )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
2016-10-18 14:48:33 +00:00
defer func ( ) {
if err != nil {
2017-07-31 18:38:45 +00:00
err2 := s . StorageRuntimeServer ( ) . DeleteContainer ( containerID )
2016-10-18 14:48:33 +00:00
if err2 != nil {
logrus . Warnf ( "Failed to cleanup container directory: %v" , err2 )
}
}
} ( )
2016-11-22 16:49:54 +00:00
2017-07-19 19:03:22 +00:00
if err = s . Runtime ( ) . CreateContainer ( container , sb . CgroupParent ( ) ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
s . addContainer ( container )
2017-07-17 12:25:32 +00:00
if err = s . CtrIDIndex ( ) . Add ( containerID ) ; err != nil {
2016-11-22 16:49:54 +00:00
s . removeContainer ( container )
return nil , err
}
2017-07-20 17:05:12 +00:00
s . ContainerStateToDisk ( container )
2017-05-11 10:03:59 +00:00
2016-11-22 16:49:54 +00:00
resp := & pb . CreateContainerResponse {
2017-02-03 14:41:28 +00:00
ContainerId : containerID ,
2016-11-22 16:49:54 +00:00
}
logrus . Debugf ( "CreateContainerResponse: %+v" , resp )
return resp , nil
}
2017-07-19 19:03:22 +00:00
func ( s * Server ) createSandboxContainer ( ctx context . Context , containerID string , containerName string , sb * sandbox . Sandbox , SandboxConfig * pb . PodSandboxConfig , containerConfig * pb . ContainerConfig ) ( * oci . Container , error ) {
2016-11-22 16:49:54 +00:00
if sb == nil {
return nil , errors . New ( "createSandboxContainer needs a sandbox" )
}
2016-10-18 14:48:33 +00:00
2017-02-03 14:41:28 +00:00
// TODO: simplify this function (cyclomatic complexity here is high)
2016-10-18 14:48:33 +00:00
// TODO: factor generating/updating the spec into something other projects can vendor
2016-11-22 16:49:54 +00:00
// creates a spec Generator with the default spec.
specgen := generate . New ( )
2017-05-30 15:04:21 +00:00
specgen . HostSpecific = true
2017-07-03 21:49:34 +00:00
specgen . ClearProcessRlimits ( )
2016-11-22 16:49:54 +00:00
2017-08-14 19:52:25 +00:00
containerVolumes , err := addOCIBindMounts ( sb , containerConfig , & specgen )
if err != nil {
return nil , err
}
volumesJSON , err := json . Marshal ( containerVolumes )
if err != nil {
2017-02-22 18:42:44 +00:00
return nil , err
2016-11-22 16:49:54 +00:00
}
2017-08-14 19:52:25 +00:00
specgen . AddAnnotation ( annotations . Volumes , string ( volumesJSON ) )
2016-11-22 16:49:54 +00:00
2017-07-07 23:32:37 +00:00
// Add cgroup mount so container process can introspect its own limits
specgen . AddCgroupsMount ( "ro" )
2017-05-08 22:11:36 +00:00
if err := addDevices ( sb , containerConfig , & specgen ) ; err != nil {
return nil , err
}
2016-11-22 16:49:54 +00:00
labels := containerConfig . GetLabels ( )
metadata := containerConfig . GetMetadata ( )
2017-06-01 16:40:33 +00:00
kubeAnnotations := containerConfig . GetAnnotations ( )
if kubeAnnotations != nil {
for k , v := range kubeAnnotations {
2016-11-22 16:49:54 +00:00
specgen . AddAnnotation ( k , v )
}
}
2017-09-04 16:11:32 +00:00
if labels != nil {
for k , v := range labels {
specgen . AddAnnotation ( k , v )
}
}
2016-11-24 13:27:56 +00:00
2017-09-06 11:25:19 +00:00
var readOnlyRootfs bool
var privileged bool
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) != nil {
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . Privileged {
privileged = true
}
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . ReadonlyRootfs {
readOnlyRootfs = true
specgen . SetRootReadonly ( true )
}
}
2016-11-24 13:27:56 +00:00
// set this container's apparmor profile if it is set by sandbox
2017-09-06 11:25:19 +00:00
if s . appArmorEnabled && ! privileged {
2017-07-19 19:03:22 +00:00
appArmorProfileName := s . getAppArmorProfileName ( sb . Annotations ( ) , metadata . Name )
2016-11-29 12:34:15 +00:00
if appArmorProfileName != "" {
2016-12-12 07:55:17 +00:00
// reload default apparmor profile if it is unloaded.
if s . appArmorProfile == apparmor . DefaultApparmorProfile {
if err := apparmor . EnsureDefaultApparmorProfile ( ) ; err != nil {
return nil , err
}
}
2016-11-29 12:34:15 +00:00
specgen . SetProcessApparmorProfile ( appArmorProfileName )
}
2016-11-24 13:27:56 +00:00
}
2016-11-22 16:49:54 +00:00
2017-02-03 14:41:28 +00:00
logPath := containerConfig . LogPath
2016-10-07 15:59:39 +00:00
if logPath == "" {
// TODO: Should we use sandboxConfig.GetLogDirectory() here?
2017-07-19 19:03:22 +00:00
logPath = filepath . Join ( sb . LogDir ( ) , containerID + ".log" )
2016-10-07 15:59:39 +00:00
}
if ! filepath . IsAbs ( logPath ) {
// XXX: It's not really clear what this should be versus the sbox logDirectory.
logrus . Warnf ( "requested logPath for ctr id %s is a relative path: %s" , containerID , logPath )
2017-07-19 19:03:22 +00:00
logPath = filepath . Join ( sb . LogDir ( ) , logPath )
2016-10-07 15:59:39 +00:00
}
2017-04-04 14:11:53 +00:00
// Handle https://issues.k8s.io/44043
if err := ensureSaneLogPath ( logPath ) ; err != nil {
return nil , err
}
2016-10-07 15:59:39 +00:00
logrus . WithFields ( logrus . Fields {
2017-07-19 19:03:22 +00:00
"sbox.logdir" : sb . LogDir ( ) ,
2016-10-07 15:59:39 +00:00
"ctr.logfile" : containerConfig . LogPath ,
"log_path" : logPath ,
} ) . Debugf ( "setting container's log_path" )
2017-02-03 14:41:28 +00:00
specgen . SetProcessTerminal ( containerConfig . Tty )
2017-09-06 14:38:01 +00:00
if containerConfig . Tty {
specgen . AddProcessEnv ( "TERM" , "xterm" )
}
2016-11-22 16:49:54 +00:00
linux := containerConfig . GetLinux ( )
if linux != nil {
resources := linux . GetResources ( )
if resources != nil {
2017-02-03 14:41:28 +00:00
cpuPeriod := resources . CpuPeriod
2016-11-22 16:49:54 +00:00
if cpuPeriod != 0 {
specgen . SetLinuxResourcesCPUPeriod ( uint64 ( cpuPeriod ) )
}
2017-02-03 14:41:28 +00:00
cpuQuota := resources . CpuQuota
2016-11-22 16:49:54 +00:00
if cpuQuota != 0 {
2017-04-12 23:12:04 +00:00
specgen . SetLinuxResourcesCPUQuota ( cpuQuota )
2016-11-22 16:49:54 +00:00
}
2017-02-03 14:41:28 +00:00
cpuShares := resources . CpuShares
2016-11-22 16:49:54 +00:00
if cpuShares != 0 {
specgen . SetLinuxResourcesCPUShares ( uint64 ( cpuShares ) )
}
2017-02-03 14:41:28 +00:00
memoryLimit := resources . MemoryLimitInBytes
2016-11-22 16:49:54 +00:00
if memoryLimit != 0 {
2017-07-20 04:07:01 +00:00
specgen . SetLinuxResourcesMemoryLimit ( memoryLimit )
2016-11-22 16:49:54 +00:00
}
2017-02-03 14:41:28 +00:00
oomScoreAdj := resources . OomScoreAdj
2017-07-20 04:07:01 +00:00
specgen . SetProcessOOMScoreAdj ( int ( oomScoreAdj ) )
2016-11-22 16:49:54 +00:00
}
2017-08-29 15:52:05 +00:00
var cgPath string
2017-08-29 21:11:30 +00:00
parent := defaultCgroupfsParent
useSystemd := s . config . CgroupManager == oci . SystemdCgroupsManager
2017-08-29 15:52:05 +00:00
if useSystemd {
2017-08-29 21:11:30 +00:00
parent = defaultSystemdParent
2017-08-29 15:52:05 +00:00
}
2017-07-19 19:03:22 +00:00
if sb . CgroupParent ( ) != "" {
2017-08-29 15:52:05 +00:00
parent = sb . CgroupParent ( )
}
if useSystemd {
cgPath = parent + ":" + scopePrefix + ":" + containerID
} else {
cgPath = filepath . Join ( parent , scopePrefix + "-" + containerID )
2016-12-13 08:34:55 +00:00
}
2017-08-29 15:52:05 +00:00
specgen . SetLinuxCgroupsPath ( cgPath )
2017-08-29 17:09:59 +00:00
sb . UpdateCgroupParent ( parent )
2016-12-13 08:34:55 +00:00
2016-11-22 16:49:54 +00:00
capabilities := linux . GetSecurityContext ( ) . GetCapabilities ( )
2017-09-06 11:25:19 +00:00
if privileged {
// this is setting correct capabilities as well for privileged mode
specgen . SetupPrivileged ( true )
} else {
toCAPPrefixed := func ( cap string ) string {
if ! strings . HasPrefix ( strings . ToLower ( cap ) , "cap_" ) {
return "CAP_" + strings . ToUpper ( cap )
}
return cap
2017-05-05 10:14:34 +00:00
}
2017-09-06 11:25:19 +00:00
// Add/drop all capabilities if "all" is specified, so that
// following individual add/drop could still work. E.g.
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
// will be all capabilities without `CAP_CHOWN`.
// see https://github.com/kubernetes/kubernetes/issues/51980
if inStringSlice ( capabilities . GetAddCapabilities ( ) , "ALL" ) {
for _ , c := range getOCICapabilitiesList ( ) {
if err := specgen . AddProcessCapability ( c ) ; err != nil {
return nil , err
}
}
}
if inStringSlice ( capabilities . GetDropCapabilities ( ) , "ALL" ) {
for _ , c := range getOCICapabilitiesList ( ) {
if err := specgen . DropProcessCapability ( c ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
}
}
2017-09-06 11:25:19 +00:00
if capabilities != nil {
for _ , cap := range capabilities . GetAddCapabilities ( ) {
if strings . ToUpper ( cap ) == "ALL" {
continue
}
if err := specgen . AddProcessCapability ( toCAPPrefixed ( cap ) ) ; err != nil {
return nil , err
}
}
for _ , cap := range capabilities . GetDropCapabilities ( ) {
if strings . ToUpper ( cap ) == "ALL" {
continue
}
2017-05-05 10:14:34 +00:00
if err := specgen . DropProcessCapability ( toCAPPrefixed ( cap ) ) ; err != nil {
2017-09-06 11:25:19 +00:00
return nil , fmt . Errorf ( "failed to drop cap %s %v" , toCAPPrefixed ( cap ) , err )
2016-11-22 16:49:54 +00:00
}
}
}
2017-09-06 11:25:19 +00:00
specgen . SetProcessSelinuxLabel ( sb . ProcessLabel ( ) )
2016-11-22 16:49:54 +00:00
}
2017-07-19 19:03:22 +00:00
specgen . SetLinuxMountLabel ( sb . MountLabel ( ) )
2016-11-22 16:49:54 +00:00
2017-05-31 00:03:54 +00:00
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) != nil &&
2017-06-16 22:49:16 +00:00
! containerConfig . GetLinux ( ) . GetSecurityContext ( ) . Privileged {
2017-05-31 00:03:54 +00:00
for _ , mp := range [ ] string {
"/proc/kcore" ,
"/proc/latency_stats" ,
"/proc/timer_list" ,
"/proc/timer_stats" ,
"/proc/sched_debug" ,
"/sys/firmware" ,
} {
specgen . AddLinuxMaskedPaths ( mp )
}
2017-05-12 10:47:40 +00:00
2017-05-31 00:03:54 +00:00
for _ , rp := range [ ] string {
"/proc/asound" ,
"/proc/bus" ,
"/proc/fs" ,
"/proc/irq" ,
"/proc/sys" ,
"/proc/sysrq-trigger" ,
} {
specgen . AddLinuxReadonlyPaths ( rp )
}
2017-05-12 10:47:40 +00:00
}
2016-11-22 16:49:54 +00:00
}
// Join the namespace paths for the pod sandbox container.
2017-07-19 19:03:22 +00:00
podInfraState := s . Runtime ( ) . ContainerStatus ( sb . InfraContainer ( ) )
2016-11-22 16:49:54 +00:00
logrus . Debugf ( "pod container state %+v" , podInfraState )
2016-11-23 17:16:21 +00:00
ipcNsPath := fmt . Sprintf ( "/proc/%d/ns/ipc" , podInfraState . Pid )
if err := specgen . AddOrReplaceLinuxNamespace ( "ipc" , ipcNsPath ) ; err != nil {
return nil , err
}
2017-07-18 20:35:15 +00:00
netNsPath := sb . NetNsPath ( )
2016-11-23 17:16:21 +00:00
if netNsPath == "" {
// The sandbox does not have a permanent namespace,
// it's on the host one.
netNsPath = fmt . Sprintf ( "/proc/%d/ns/net" , podInfraState . Pid )
}
if err := specgen . AddOrReplaceLinuxNamespace ( "network" , netNsPath ) ; err != nil {
return nil , err
2016-11-22 16:49:54 +00:00
}
2016-12-12 10:12:03 +00:00
imageSpec := containerConfig . GetImage ( )
if imageSpec == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Image is nil" )
}
2017-02-03 14:41:28 +00:00
image := imageSpec . Image
2016-12-12 10:12:03 +00:00
if image == "" {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Image.Image is empty" )
}
2017-07-20 08:01:23 +00:00
images , err := s . StorageImageServer ( ) . ResolveNames ( image )
if err != nil {
// This means we got an image ID
if strings . Contains ( err . Error ( ) , "cannot specify 64-byte hexadecimal strings" ) {
images = append ( images , image )
} else {
return nil , err
}
}
image = images [ 0 ]
2016-12-12 10:12:03 +00:00
2017-08-14 19:29:53 +00:00
// Get imageName and imageRef that are requested in container status
imageName := image
status , err := s . StorageImageServer ( ) . ImageStatus ( s . ImageContext ( ) , image )
if err != nil {
return nil , err
}
imageRef := status . ID
//
// TODO: https://github.com/kubernetes-incubator/cri-o/issues/531
//
//for _, n := range status.Names {
//r, err := reference.ParseNormalizedNamed(n)
//if err != nil {
//return nil, fmt.Errorf("failed to normalize image name for ImageRef: %v", err)
//}
//if digested, isDigested := r.(reference.Canonical); isDigested {
//imageRef = reference.FamiliarString(digested)
//break
//}
//}
for _ , n := range status . Names {
r , err := reference . ParseNormalizedNamed ( n )
if err != nil {
return nil , fmt . Errorf ( "failed to normalize image name for Image: %v" , err )
}
if tagged , isTagged := r . ( reference . Tagged ) ; isTagged {
imageName = reference . FamiliarString ( tagged )
break
}
}
specgen . AddAnnotation ( annotations . ImageName , imageName )
specgen . AddAnnotation ( annotations . ImageRef , imageRef )
2017-08-29 23:00:49 +00:00
specgen . AddAnnotation ( annotations . IP , sb . IP ( ) )
2017-08-14 19:29:53 +00:00
2016-12-08 23:32:17 +00:00
// bind mount the pod shm
2017-07-19 19:03:22 +00:00
specgen . AddBindMount ( sb . ShmPath ( ) , "/dev/shm" , [ ] string { "rw" } )
2016-12-08 23:32:17 +00:00
2017-06-14 13:28:13 +00:00
options := [ ] string { "rw" }
if readOnlyRootfs {
options = [ ] string { "ro" }
}
2017-07-19 19:03:22 +00:00
if sb . ResolvPath ( ) != "" {
2017-03-24 14:32:16 +00:00
// bind mount the pod resolver file
2017-07-19 19:03:22 +00:00
specgen . AddBindMount ( sb . ResolvPath ( ) , "/etc/resolv.conf" , options )
2017-03-24 14:32:16 +00:00
}
2017-04-22 00:54:29 +00:00
// Bind mount /etc/hosts for host networking containers
if hostNetwork ( containerConfig ) {
2017-06-14 13:28:13 +00:00
specgen . AddBindMount ( "/etc/hosts" , "/etc/hosts" , options )
2017-04-22 00:54:29 +00:00
}
2017-07-19 19:03:22 +00:00
if sb . Hostname ( ) != "" {
specgen . SetHostname ( sb . Hostname ( ) )
2017-03-29 23:11:57 +00:00
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Name , containerName )
2017-06-23 16:31:13 +00:00
specgen . AddAnnotation ( annotations . ContainerID , containerID )
2017-07-19 19:03:22 +00:00
specgen . AddAnnotation ( annotations . SandboxID , sb . ID ( ) )
specgen . AddAnnotation ( annotations . SandboxName , sb . InfraContainer ( ) . Name ( ) )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . ContainerType , annotations . ContainerTypeContainer )
specgen . AddAnnotation ( annotations . LogPath , logPath )
specgen . AddAnnotation ( annotations . TTY , fmt . Sprintf ( "%v" , containerConfig . Tty ) )
2017-06-08 20:08:29 +00:00
specgen . AddAnnotation ( annotations . Stdin , fmt . Sprintf ( "%v" , containerConfig . Stdin ) )
specgen . AddAnnotation ( annotations . StdinOnce , fmt . Sprintf ( "%v" , containerConfig . StdinOnce ) )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Image , image )
2016-11-22 16:49:54 +00:00
2017-05-11 09:22:47 +00:00
created := time . Now ( )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Created , created . Format ( time . RFC3339Nano ) )
2017-05-11 09:22:47 +00:00
2016-11-22 16:49:54 +00:00
metadataJSON , err := json . Marshal ( metadata )
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Metadata , string ( metadataJSON ) )
2016-11-22 16:49:54 +00:00
labelsJSON , err := json . Marshal ( labels )
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Labels , string ( labelsJSON ) )
2016-11-22 16:49:54 +00:00
2017-06-01 16:40:33 +00:00
kubeAnnotationsJSON , err := json . Marshal ( kubeAnnotations )
2016-12-12 17:55:34 +00:00
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Annotations , string ( kubeAnnotationsJSON ) )
2016-12-12 17:55:34 +00:00
2017-09-06 11:25:19 +00:00
if ! privileged {
if err = s . setupSeccomp ( & specgen , containerName , sb . Annotations ( ) ) ; err != nil {
return nil , err
}
2016-11-23 09:41:48 +00:00
}
2017-02-03 14:41:28 +00:00
metaname := metadata . Name
attempt := metadata . Attempt
2017-07-31 18:38:45 +00:00
containerInfo , err := s . StorageRuntimeServer ( ) . CreateContainer ( s . ImageContext ( ) ,
2017-07-19 19:03:22 +00:00
sb . Name ( ) , sb . ID ( ) ,
2017-01-31 15:32:27 +00:00
image , image ,
2016-10-18 14:48:33 +00:00
containerName , containerID ,
metaname ,
attempt ,
2017-07-19 19:03:22 +00:00
sb . MountLabel ( ) ,
2016-10-18 14:48:33 +00:00
nil )
if err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
2017-07-31 18:38:45 +00:00
mountPoint , err := s . StorageRuntimeServer ( ) . StartContainer ( containerID )
2016-10-18 14:48:33 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to mount container %s(%s): %v" , containerName , containerID , err )
}
2017-08-31 13:16:25 +00:00
specgen . AddAnnotation ( annotations . MountPoint , mountPoint )
2016-10-18 14:48:33 +00:00
2017-04-04 22:39:59 +00:00
containerImageConfig := containerInfo . Config
2017-06-07 16:38:04 +00:00
if containerImageConfig == nil {
return nil , fmt . Errorf ( "empty image config for %s" , image )
}
2017-04-04 22:39:59 +00:00
2017-05-26 16:31:28 +00:00
if containerImageConfig . Config . StopSignal != "" {
// this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
specgen . AddAnnotation ( "org.opencontainers.image.stopSignal" , containerImageConfig . Config . StopSignal )
}
2017-07-14 22:32:25 +00:00
// Add image volumes
2017-07-19 19:03:22 +00:00
if err := addImageVolumes ( mountPoint , s , & containerInfo , & specgen , sb . MountLabel ( ) ) ; err != nil {
2017-07-14 22:32:25 +00:00
return nil , err
2017-05-22 15:19:49 +00:00
}
2017-04-04 22:39:59 +00:00
processArgs , err := buildOCIProcessArgs ( containerConfig , containerImageConfig )
2017-03-16 09:59:41 +00:00
if err != nil {
return nil , err
2016-10-18 14:48:33 +00:00
}
specgen . SetProcessArgs ( processArgs )
2017-03-27 22:53:47 +00:00
// Add environment variables from CRI and image config
envs := containerConfig . GetEnvs ( )
if envs != nil {
for _ , item := range envs {
key := item . Key
value := item . Value
if key == "" {
continue
}
specgen . AddProcessEnv ( key , value )
}
}
2017-04-04 22:39:59 +00:00
if containerImageConfig != nil {
for _ , item := range containerImageConfig . Config . Env {
parts := strings . SplitN ( item , "=" , 2 )
if len ( parts ) != 2 {
return nil , fmt . Errorf ( "invalid env from image: %s" , item )
}
2017-03-27 22:53:47 +00:00
2017-04-04 22:39:59 +00:00
if parts [ 0 ] == "" {
continue
}
specgen . AddProcessEnv ( parts [ 0 ] , parts [ 1 ] )
2017-03-27 22:53:47 +00:00
}
}
// Set working directory
// Pick it up from image config first and override if specified in CRI
2017-03-31 21:04:16 +00:00
containerCwd := "/"
2017-04-04 22:39:59 +00:00
if containerImageConfig != nil {
imageCwd := containerImageConfig . Config . WorkingDir
if imageCwd != "" {
containerCwd = imageCwd
}
2017-03-31 21:04:16 +00:00
}
runtimeCwd := containerConfig . WorkingDir
if runtimeCwd != "" {
containerCwd = runtimeCwd
2017-03-27 22:53:47 +00:00
}
2017-03-31 21:04:16 +00:00
specgen . SetProcessCwd ( containerCwd )
2017-03-27 22:53:47 +00:00
2017-03-29 18:23:33 +00:00
// Setup user and groups
if linux != nil {
2017-04-04 22:39:59 +00:00
if err = setupContainerUser ( & specgen , mountPoint , linux . GetSecurityContext ( ) , containerImageConfig ) ; err != nil {
2017-03-29 18:23:33 +00:00
return nil , err
}
}
2017-07-07 21:43:35 +00:00
// Set up pids limit if pids cgroup is mounted
_ , err = cgroups . FindCgroupMountpoint ( "pids" )
if err == nil {
specgen . SetLinuxResourcesPidsLimit ( s . config . PidsLimit )
}
2016-10-18 14:48:33 +00:00
// by default, the root path is an empty string. set it now.
specgen . SetRootPath ( mountPoint )
saveOptions := generate . ExportOptions { }
if err = specgen . SaveToFile ( filepath . Join ( containerInfo . Dir , "config.json" ) , saveOptions ) ; err != nil {
return nil , err
}
if err = specgen . SaveToFile ( filepath . Join ( containerInfo . RunDir , "config.json" ) , saveOptions ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
2017-08-14 19:29:53 +00:00
container , err := oci . NewContainer ( containerID , containerName , containerInfo . RunDir , logPath , sb . NetNs ( ) , labels , kubeAnnotations , image , imageName , imageRef , metadata , sb . ID ( ) , containerConfig . Tty , containerConfig . Stdin , containerConfig . StdinOnce , sb . Privileged ( ) , sb . Trusted ( ) , containerInfo . Dir , created , containerImageConfig . Config . StopSignal )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
2017-08-31 13:16:25 +00:00
container . SetMountPoint ( mountPoint )
2016-11-22 16:49:54 +00:00
2017-08-14 19:52:25 +00:00
for _ , cv := range containerVolumes {
container . AddVolume ( cv )
}
2016-11-22 16:49:54 +00:00
return container , nil
}
2016-11-23 09:41:48 +00:00
func ( s * Server ) setupSeccomp ( specgen * generate . Generator , cname string , sbAnnotations map [ string ] string ) error {
profile , ok := sbAnnotations [ "security.alpha.kubernetes.io/seccomp/container/" + cname ]
if ! ok {
profile , ok = sbAnnotations [ "security.alpha.kubernetes.io/seccomp/pod" ]
if ! ok {
// running w/o seccomp, aka unconfined
profile = seccompUnconfined
}
}
if ! s . seccompEnabled {
if profile != seccompUnconfined {
return fmt . Errorf ( "seccomp is not enabled in your kernel, cannot run with a profile" )
}
logrus . Warn ( "seccomp is not enabled in your kernel, running container without profile" )
}
if profile == seccompUnconfined {
// running w/o seccomp, aka unconfined
specgen . Spec ( ) . Linux . Seccomp = nil
return nil
}
if profile == seccompRuntimeDefault {
return seccomp . LoadProfileFromStruct ( s . seccompProfile , specgen )
}
if ! strings . HasPrefix ( profile , seccompLocalhostPrefix ) {
return fmt . Errorf ( "unknown seccomp profile option: %q" , profile )
}
//file, err := ioutil.ReadFile(filepath.Join(s.seccompProfileRoot, strings.TrimPrefix(profile, seccompLocalhostPrefix)))
//if err != nil {
//return err
//}
// TODO(runcom): setup from provided node's seccomp profile
// can't do this yet, see https://issues.k8s.io/36997
return nil
}
2016-11-30 08:19:36 +00:00
// getAppArmorProfileName gets the profile name for the given container.
func ( s * Server ) getAppArmorProfileName ( annotations map [ string ] string , ctrName string ) string {
profile := apparmor . GetProfileNameFromPodAnnotations ( annotations , ctrName )
if profile == "" {
return ""
}
if profile == apparmor . ProfileRuntimeDefault {
// If the value is runtime/default, then return default profile.
return s . appArmorProfile
}
2016-12-02 07:13:41 +00:00
return strings . TrimPrefix ( profile , apparmor . ProfileNamePrefix )
2016-11-30 08:19:36 +00:00
}
2017-03-29 18:16:53 +00:00
// openContainerFile opens a file inside a container rootfs safely
func openContainerFile ( rootfs string , path string ) ( io . ReadCloser , error ) {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , path ) , rootfs )
if err != nil {
return nil , err
}
return os . Open ( fp )
}
2017-03-29 18:18:35 +00:00
// getUserInfo returns UID, GID and additional groups for specified user
// by looking them up in /etc/passwd and /etc/group
func getUserInfo ( rootfs string , userName string ) ( uint32 , uint32 , [ ] uint32 , error ) {
// We don't care if we can't open the file because
// not all images will have these files
passwdFile , err := openContainerFile ( rootfs , "/etc/passwd" )
2017-03-29 18:23:33 +00:00
if err != nil {
logrus . Warnf ( "Failed to open /etc/passwd: %v" , err )
} else {
2017-03-29 18:18:35 +00:00
defer passwdFile . Close ( )
}
2017-03-29 18:23:33 +00:00
2017-03-29 18:18:35 +00:00
groupFile , err := openContainerFile ( rootfs , "/etc/group" )
2017-03-29 18:23:33 +00:00
if err != nil {
logrus . Warnf ( "Failed to open /etc/group: %v" , err )
} else {
2017-03-29 18:18:35 +00:00
defer groupFile . Close ( )
}
execUser , err := user . GetExecUser ( userName , nil , passwdFile , groupFile )
if err != nil {
return 0 , 0 , nil , err
}
uid := uint32 ( execUser . Uid )
gid := uint32 ( execUser . Gid )
var additionalGids [ ] uint32
for _ , g := range execUser . Sgids {
additionalGids = append ( additionalGids , uint32 ( g ) )
}
return uid , gid , additionalGids , nil
}