2016-11-22 16:49:54 +00:00
package server
import (
"encoding/json"
"errors"
"fmt"
2017-03-29 18:16:53 +00:00
"io"
2017-09-27 18:46:31 +00:00
"io/ioutil"
2017-03-29 18:16:53 +00:00
"os"
2016-11-22 16:49:54 +00:00
"path/filepath"
2017-08-12 10:29:22 +00:00
"regexp"
2017-10-28 21:43:20 +00:00
"sort"
2017-03-29 18:23:33 +00:00
"strconv"
2016-11-23 09:41:48 +00:00
"strings"
2017-05-11 09:22:47 +00:00
"time"
2016-11-22 16:49:54 +00:00
2017-09-27 12:49:29 +00:00
dockermounts "github.com/docker/docker/pkg/mount"
2017-07-14 22:32:25 +00:00
"github.com/docker/docker/pkg/stringid"
2017-03-29 18:16:53 +00:00
"github.com/docker/docker/pkg/symlink"
2017-11-30 15:46:11 +00:00
"github.com/kubernetes-incubator/cri-o/lib"
"github.com/kubernetes-incubator/cri-o/lib/sandbox"
2016-11-22 16:49:54 +00:00
"github.com/kubernetes-incubator/cri-o/oci"
2017-06-01 16:40:33 +00:00
"github.com/kubernetes-incubator/cri-o/pkg/annotations"
2017-07-14 22:32:25 +00:00
"github.com/kubernetes-incubator/cri-o/pkg/storage"
2016-11-29 12:34:15 +00:00
"github.com/kubernetes-incubator/cri-o/server/apparmor"
2016-11-23 09:41:48 +00:00
"github.com/kubernetes-incubator/cri-o/server/seccomp"
2017-03-16 09:59:41 +00:00
"github.com/opencontainers/image-spec/specs-go/v1"
2017-07-07 21:43:35 +00:00
"github.com/opencontainers/runc/libcontainer/cgroups"
2017-05-08 22:10:09 +00:00
"github.com/opencontainers/runc/libcontainer/devices"
2017-03-29 18:18:35 +00:00
"github.com/opencontainers/runc/libcontainer/user"
2017-05-08 22:10:09 +00:00
rspec "github.com/opencontainers/runtime-spec/specs-go"
2016-11-22 16:49:54 +00:00
"github.com/opencontainers/runtime-tools/generate"
2017-03-22 17:58:35 +00:00
"github.com/opencontainers/selinux/go-selinux/label"
2017-08-05 11:40:46 +00:00
"github.com/sirupsen/logrus"
2016-11-22 16:49:54 +00:00
"golang.org/x/net/context"
2017-06-28 15:47:31 +00:00
"golang.org/x/sys/unix"
2017-08-04 11:13:19 +00:00
pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
2016-11-22 16:49:54 +00:00
)
2016-11-23 09:41:48 +00:00
const (
seccompUnconfined = "unconfined"
seccompRuntimeDefault = "runtime/default"
2017-09-18 10:50:39 +00:00
seccompDockerDefault = "docker/default"
2016-11-23 09:41:48 +00:00
seccompLocalhostPrefix = "localhost/"
2017-08-29 21:11:30 +00:00
scopePrefix = "crio"
defaultCgroupfsParent = "/crio"
defaultSystemdParent = "system.slice"
2016-11-23 09:41:48 +00:00
)
2017-10-28 21:43:20 +00:00
type orderedMounts [ ] rspec . Mount
// Len returns the number of mounts. Used in sorting.
func ( m orderedMounts ) Len ( ) int {
return len ( m )
}
// Less returns true if the number of parts (a/b/c would be 3 parts) in the
// mount indexed by parameter 1 is less than that of the mount indexed by
// parameter 2. Used in sorting.
func ( m orderedMounts ) Less ( i , j int ) bool {
return m . parts ( i ) < m . parts ( j )
}
// Swap swaps two items in an array of mounts. Used in sorting
func ( m orderedMounts ) Swap ( i , j int ) {
m [ i ] , m [ j ] = m [ j ] , m [ i ]
}
// parts returns the number of parts in the destination of a mount. Used in sorting.
func ( m orderedMounts ) parts ( i int ) int {
return strings . Count ( filepath . Clean ( m [ i ] . Destination ) , string ( os . PathSeparator ) )
}
func addOCIBindMounts ( mountLabel string , containerConfig * pb . ContainerConfig , specgen * generate . Generator ) ( [ ] oci . ContainerVolume , [ ] rspec . Mount , error ) {
2017-08-14 19:52:25 +00:00
volumes := [ ] oci . ContainerVolume { }
2017-10-28 21:43:20 +00:00
ociMounts := [ ] rspec . Mount { }
2017-02-22 18:42:44 +00:00
mounts := containerConfig . GetMounts ( )
for _ , mount := range mounts {
dest := mount . ContainerPath
if dest == "" {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "Mount.ContainerPath is empty" )
2017-02-22 18:42:44 +00:00
}
src := mount . HostPath
if src == "" {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "Mount.HostPath is empty" )
2017-02-22 18:42:44 +00:00
}
2017-06-01 14:09:39 +00:00
if _ , err := os . Stat ( src ) ; err != nil && os . IsNotExist ( err ) {
2017-06-16 22:49:16 +00:00
if err1 := os . MkdirAll ( src , 0644 ) ; err1 != nil {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "Failed to mkdir %s: %s" , src , err )
2017-06-16 22:49:16 +00:00
}
2017-06-01 14:09:39 +00:00
}
2017-09-18 13:53:48 +00:00
src , err := resolveSymbolicLink ( src )
if err != nil {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "failed to resolve symlink %q: %v" , src , err )
2017-09-18 13:53:48 +00:00
}
2017-02-22 18:42:44 +00:00
options := [ ] string { "rw" }
if mount . Readonly {
options = [ ] string { "ro" }
}
2017-10-20 22:17:15 +00:00
options = append ( options , "rbind" )
2017-02-22 18:42:44 +00:00
2017-09-27 12:49:29 +00:00
// mount propagation
mountInfos , err := dockermounts . GetMounts ( )
if err != nil {
return nil , nil , err
}
switch mount . GetPropagation ( ) {
case pb . MountPropagation_PROPAGATION_PRIVATE :
options = append ( options , "rprivate" )
// Since default root propagation in runc is rprivate ignore
// setting the root propagation
case pb . MountPropagation_PROPAGATION_BIDIRECTIONAL :
if err := ensureShared ( src , mountInfos ) ; err != nil {
return nil , nil , err
}
options = append ( options , "rshared" )
specgen . SetLinuxRootPropagation ( "rshared" )
case pb . MountPropagation_PROPAGATION_HOST_TO_CONTAINER :
if err := ensureSharedOrSlave ( src , mountInfos ) ; err != nil {
return nil , nil , err
}
options = append ( options , "rslave" )
if specgen . Spec ( ) . Linux . RootfsPropagation != "rshared" &&
specgen . Spec ( ) . Linux . RootfsPropagation != "rslave" {
specgen . SetLinuxRootPropagation ( "rslave" )
}
default :
logrus . Warnf ( "Unknown propagation mode for hostPath %q" , mount . HostPath )
options = append ( options , "rprivate" )
}
2017-02-22 18:42:44 +00:00
if mount . SelinuxRelabel {
// Need a way in kubernetes to determine if the volume is shared or private
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( src , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
2017-10-28 21:43:20 +00:00
return nil , nil , fmt . Errorf ( "relabel failed %s: %v" , src , err )
2017-02-22 18:42:44 +00:00
}
}
2017-08-14 19:52:25 +00:00
volumes = append ( volumes , oci . ContainerVolume {
ContainerPath : dest ,
HostPath : src ,
Readonly : mount . Readonly ,
} )
2017-10-28 21:43:20 +00:00
ociMounts = append ( ociMounts , rspec . Mount {
Source : src ,
Destination : dest ,
Options : options ,
} )
2017-02-22 18:42:44 +00:00
}
2017-10-28 21:43:20 +00:00
return volumes , ociMounts , nil
2017-02-22 18:42:44 +00:00
}
2017-09-27 12:49:29 +00:00
// Ensure mount point on which path is mounted, is shared.
func ensureShared ( path string , mountInfos [ ] * dockermounts . Info ) error {
sourceMount , optionalOpts , err := getSourceMount ( path , mountInfos )
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings . Split ( optionalOpts , " " )
for _ , opt := range optsSplit {
if strings . HasPrefix ( opt , "shared:" ) {
return nil
}
}
return fmt . Errorf ( "path %q is mounted on %q but it is not a shared mount" , path , sourceMount )
}
// Ensure mount point on which path is mounted, is either shared or slave.
func ensureSharedOrSlave ( path string , mountInfos [ ] * dockermounts . Info ) error {
sourceMount , optionalOpts , err := getSourceMount ( path , mountInfos )
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings . Split ( optionalOpts , " " )
for _ , opt := range optsSplit {
if strings . HasPrefix ( opt , "shared:" ) {
return nil
} else if strings . HasPrefix ( opt , "master:" ) {
return nil
}
}
return fmt . Errorf ( "path %q is mounted on %q but it is not a shared or slave mount" , path , sourceMount )
}
func getMountInfo ( mountInfos [ ] * dockermounts . Info , dir string ) * dockermounts . Info {
for _ , m := range mountInfos {
if m . Mountpoint == dir {
return m
}
}
return nil
}
func getSourceMount ( source string , mountInfos [ ] * dockermounts . Info ) ( string , string , error ) {
mountinfo := getMountInfo ( mountInfos , source )
if mountinfo != nil {
return source , mountinfo . Optional , nil
}
path := source
for {
path = filepath . Dir ( path )
mountinfo = getMountInfo ( mountInfos , path )
if mountinfo != nil {
return path , mountinfo . Optional , nil
}
if path == "/" {
break
}
}
// If we are here, we did not find parent mount. Something is wrong.
return "" , "" , fmt . Errorf ( "Could not find source mount of %s" , source )
}
2017-10-28 21:43:20 +00:00
func addImageVolumes ( rootfs string , s * Server , containerInfo * storage . ContainerInfo , specgen * generate . Generator , mountLabel string ) ( [ ] rspec . Mount , error ) {
mounts := [ ] rspec . Mount { }
2017-07-14 22:32:25 +00:00
for dest := range containerInfo . Config . Config . Volumes {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , dest ) , rootfs )
if err != nil {
2017-10-28 21:43:20 +00:00
return nil , err
2017-07-14 22:32:25 +00:00
}
switch s . config . ImageVolumes {
2017-11-30 15:46:11 +00:00
case lib . ImageVolumesMkdir :
2017-07-14 22:32:25 +00:00
if err1 := os . MkdirAll ( fp , 0644 ) ; err1 != nil {
2017-10-28 21:43:20 +00:00
return nil , err1
2017-07-14 22:32:25 +00:00
}
2017-11-30 15:46:11 +00:00
case lib . ImageVolumesBind :
2017-07-14 22:32:25 +00:00
volumeDirName := stringid . GenerateNonCryptoID ( )
src := filepath . Join ( containerInfo . RunDir , "mounts" , volumeDirName )
if err1 := os . MkdirAll ( src , 0644 ) ; err1 != nil {
2017-10-28 21:43:20 +00:00
return nil , err1
2017-07-14 22:32:25 +00:00
}
// Label the source with the sandbox selinux mount label
if mountLabel != "" {
if err1 := label . Relabel ( src , mountLabel , true ) ; err1 != nil && err1 != unix . ENOTSUP {
2017-10-28 21:43:20 +00:00
return nil , fmt . Errorf ( "relabel failed %s: %v" , src , err1 )
2017-07-14 22:32:25 +00:00
}
}
logrus . Debugf ( "Adding bind mounted volume: %s to %s" , src , dest )
2017-10-28 21:43:20 +00:00
mounts = append ( mounts , rspec . Mount {
Source : src ,
Destination : dest ,
Options : [ ] string { "rw" } ,
} )
2017-11-30 15:46:11 +00:00
case lib . ImageVolumesIgnore :
2017-07-14 22:32:25 +00:00
logrus . Debugf ( "Ignoring volume %v" , dest )
default :
logrus . Fatalf ( "Unrecognized image volumes setting" )
}
}
2017-10-28 21:43:20 +00:00
return mounts , nil
2017-07-14 22:32:25 +00:00
}
2017-09-06 15:04:18 +00:00
// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved
// path; if not, returns the original path.
func resolveSymbolicLink ( path string ) ( string , error ) {
info , err := os . Lstat ( path )
if err != nil {
return "" , err
}
if info . Mode ( ) & os . ModeSymlink != os . ModeSymlink {
return path , nil
}
return filepath . EvalSymlinks ( path )
}
2017-07-19 19:03:22 +00:00
func addDevices ( sb * sandbox . Sandbox , containerConfig * pb . ContainerConfig , specgen * generate . Generator ) error {
2017-05-08 22:10:09 +00:00
sp := specgen . Spec ( )
2017-10-09 19:53:54 +00:00
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetPrivileged ( ) {
2017-09-06 15:04:18 +00:00
hostDevices , err := devices . HostDevices ( )
if err != nil {
return err
}
for _ , hostDevice := range hostDevices {
rd := rspec . LinuxDevice {
Path : hostDevice . Path ,
Type : string ( hostDevice . Type ) ,
Major : hostDevice . Major ,
Minor : hostDevice . Minor ,
UID : & hostDevice . Uid ,
GID : & hostDevice . Gid ,
}
if hostDevice . Major == 0 && hostDevice . Minor == 0 {
// Invalid device, most likely a symbolic link, skip it.
continue
}
specgen . AddDevice ( rd )
}
sp . Linux . Resources . Devices = [ ] rspec . LinuxDeviceCgroup {
{
Allow : true ,
Access : "rwm" ,
} ,
}
return nil
}
2017-05-08 22:10:09 +00:00
for _ , device := range containerConfig . GetDevices ( ) {
2017-09-06 15:04:18 +00:00
path , err := resolveSymbolicLink ( device . HostPath )
2017-05-08 22:10:09 +00:00
if err != nil {
2017-09-06 15:04:18 +00:00
return err
2017-05-08 22:10:09 +00:00
}
2017-09-06 15:04:18 +00:00
dev , err := devices . DeviceFromPath ( path , device . Permissions )
// if there was no error, return the device
if err == nil {
rd := rspec . LinuxDevice {
Path : device . ContainerPath ,
Type : string ( dev . Type ) ,
Major : dev . Major ,
Minor : dev . Minor ,
UID : & dev . Uid ,
GID : & dev . Gid ,
}
specgen . AddDevice ( rd )
sp . Linux . Resources . Devices = append ( sp . Linux . Resources . Devices , rspec . LinuxDeviceCgroup {
Allow : true ,
Type : string ( dev . Type ) ,
Major : & dev . Major ,
Minor : & dev . Minor ,
Access : dev . Permissions ,
} )
continue
}
// if the device is not a device node
// try to see if it's a directory holding many devices
if err == devices . ErrNotADevice {
// check if it is a directory
if src , e := os . Stat ( path ) ; e == nil && src . IsDir ( ) {
// mount the internal devices recursively
filepath . Walk ( path , func ( dpath string , f os . FileInfo , e error ) error {
childDevice , e := devices . DeviceFromPath ( dpath , device . Permissions )
if e != nil {
// ignore the device
return nil
}
cPath := strings . Replace ( dpath , path , device . ContainerPath , 1 )
rd := rspec . LinuxDevice {
Path : cPath ,
Type : string ( childDevice . Type ) ,
Major : childDevice . Major ,
Minor : childDevice . Minor ,
UID : & childDevice . Uid ,
GID : & childDevice . Gid ,
}
specgen . AddDevice ( rd )
sp . Linux . Resources . Devices = append ( sp . Linux . Resources . Devices , rspec . LinuxDeviceCgroup {
Allow : true ,
Type : string ( childDevice . Type ) ,
Major : & childDevice . Major ,
Minor : & childDevice . Minor ,
Access : childDevice . Permissions ,
} )
return nil
} )
}
2017-05-08 22:10:09 +00:00
}
}
return nil
}
2017-03-16 09:59:41 +00:00
// buildOCIProcessArgs build an OCI compatible process arguments slice.
func buildOCIProcessArgs ( containerKubeConfig * pb . ContainerConfig , imageOCIConfig * v1 . Image ) ( [ ] string , error ) {
2017-06-07 16:38:04 +00:00
//# Start the nginx container using the default command, but use custom
//arguments (arg1 .. argN) for that command.
//kubectl run nginx --image=nginx -- <arg1> <arg2> ... <argN>
//# Start the nginx container using a different command and custom arguments.
//kubectl run nginx --image=nginx --command -- <cmd> <arg1> ... <argN>
2017-03-16 09:59:41 +00:00
kubeCommands := containerKubeConfig . Command
kubeArgs := containerKubeConfig . Args
2017-06-07 16:38:04 +00:00
// merge image config and kube config
// same as docker does today...
if imageOCIConfig != nil {
if len ( kubeCommands ) == 0 {
if len ( kubeArgs ) == 0 {
kubeArgs = imageOCIConfig . Config . Cmd
}
if kubeCommands == nil {
kubeCommands = imageOCIConfig . Config . Entrypoint
}
}
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
if len ( kubeCommands ) == 0 && len ( kubeArgs ) == 0 {
return nil , fmt . Errorf ( "no command specified" )
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
// create entrypoint and args
var entrypoint string
var args [ ] string
if len ( kubeCommands ) != 0 {
entrypoint = kubeCommands [ 0 ]
args = append ( kubeCommands [ 1 : ] , kubeArgs ... )
2017-03-16 09:59:41 +00:00
} else {
2017-06-07 16:38:04 +00:00
entrypoint = kubeArgs [ 0 ]
args = kubeArgs [ 1 : ]
2017-03-16 09:59:41 +00:00
}
2017-06-07 16:38:04 +00:00
processArgs := append ( [ ] string { entrypoint } , args ... )
2017-03-16 09:59:41 +00:00
logrus . Debugf ( "OCI process args %v" , processArgs )
return processArgs , nil
}
2017-08-12 10:29:22 +00:00
// addOCIHook look for hooks programs installed in hooksDirPath and add them to spec
2017-11-30 15:46:11 +00:00
func addOCIHook ( specgen * generate . Generator , hook lib . HookParams ) error {
2017-08-12 10:29:22 +00:00
logrus . Debugf ( "AddOCIHook" , hook )
for _ , stage := range hook . Stage {
2018-01-04 15:53:55 +00:00
h := rspec . Hook {
Path : hook . Hook ,
Args : append ( [ ] string { hook . Hook } , hook . Arguments ... ) ,
Env : [ ] string { fmt . Sprintf ( "stage=%s" , stage ) } ,
}
2017-08-12 10:29:22 +00:00
switch stage {
case "prestart" :
2018-01-04 15:53:55 +00:00
specgen . AddPreStartHook ( h )
2017-08-12 10:29:22 +00:00
case "poststart" :
2018-01-04 15:53:55 +00:00
specgen . AddPostStartHook ( h )
2017-08-12 10:29:22 +00:00
case "poststop" :
2018-01-04 15:53:55 +00:00
specgen . AddPostStopHook ( h )
2017-08-12 10:29:22 +00:00
}
}
return nil
}
2017-03-29 18:23:33 +00:00
// setupContainerUser sets the UID, GID and supplemental groups in OCI runtime config
func setupContainerUser ( specgen * generate . Generator , rootfs string , sc * pb . LinuxContainerSecurityContext , imageConfig * v1 . Image ) error {
if sc != nil {
containerUser := ""
// Case 1: run as user is set by kubelet
2017-09-13 11:06:05 +00:00
if sc . GetRunAsUser ( ) != nil {
2017-03-29 18:23:33 +00:00
containerUser = strconv . FormatInt ( sc . GetRunAsUser ( ) . Value , 10 )
} else {
// Case 2: run as username is set by kubelet
2017-09-13 11:06:05 +00:00
userName := sc . GetRunAsUsername ( )
2017-03-29 18:23:33 +00:00
if userName != "" {
containerUser = userName
} else {
// Case 3: get user from image config
2017-04-04 22:39:59 +00:00
if imageConfig != nil {
imageUser := imageConfig . Config . User
if imageUser != "" {
containerUser = imageUser
}
2017-03-29 18:23:33 +00:00
}
}
}
logrus . Debugf ( "CONTAINER USER: %+v" , containerUser )
// Add uid, gid and groups from user
uid , gid , addGroups , err1 := getUserInfo ( rootfs , containerUser )
if err1 != nil {
return err1
}
logrus . Debugf ( "UID: %v, GID: %v, Groups: %+v" , uid , gid , addGroups )
specgen . SetProcessUID ( uid )
specgen . SetProcessGID ( gid )
for _ , group := range addGroups {
specgen . AddProcessAdditionalGid ( group )
}
// Add groups from CRI
2017-09-13 11:06:05 +00:00
groups := sc . GetSupplementalGroups ( )
2017-03-29 18:23:33 +00:00
for _ , group := range groups {
specgen . AddProcessAdditionalGid ( uint32 ( group ) )
}
}
return nil
}
2018-01-19 17:49:28 +00:00
// setupCapabilities sets process.capabilities in the OCI runtime config.
func setupCapabilities ( specgen * generate . Generator , capabilities * pb . Capability ) error {
if capabilities == nil {
return nil
}
toCAPPrefixed := func ( cap string ) string {
if ! strings . HasPrefix ( strings . ToLower ( cap ) , "cap_" ) {
return "CAP_" + strings . ToUpper ( cap )
}
return cap
}
// Add/drop all capabilities if "all" is specified, so that
// following individual add/drop could still work. E.g.
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
// will be all capabilities without `CAP_CHOWN`.
// see https://github.com/kubernetes/kubernetes/issues/51980
if inStringSlice ( capabilities . GetAddCapabilities ( ) , "ALL" ) {
for _ , c := range getOCICapabilitiesList ( ) {
if err := specgen . AddProcessCapabilityAmbient ( c ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityBounding ( c ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityEffective ( c ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityInheritable ( c ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityPermitted ( c ) ; err != nil {
return err
}
}
}
if inStringSlice ( capabilities . GetDropCapabilities ( ) , "ALL" ) {
for _ , c := range getOCICapabilitiesList ( ) {
if err := specgen . DropProcessCapabilityAmbient ( c ) ; err != nil {
return err
}
if err := specgen . DropProcessCapabilityBounding ( c ) ; err != nil {
return err
}
if err := specgen . DropProcessCapabilityEffective ( c ) ; err != nil {
return err
}
if err := specgen . DropProcessCapabilityInheritable ( c ) ; err != nil {
return err
}
if err := specgen . DropProcessCapabilityPermitted ( c ) ; err != nil {
return err
}
}
}
for _ , cap := range capabilities . GetAddCapabilities ( ) {
if strings . ToUpper ( cap ) == "ALL" {
continue
}
capPrefixed := toCAPPrefixed ( cap )
if err := specgen . AddProcessCapabilityAmbient ( capPrefixed ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityBounding ( capPrefixed ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityEffective ( capPrefixed ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityInheritable ( capPrefixed ) ; err != nil {
return err
}
if err := specgen . AddProcessCapabilityPermitted ( capPrefixed ) ; err != nil {
return err
}
}
for _ , cap := range capabilities . GetDropCapabilities ( ) {
if strings . ToUpper ( cap ) == "ALL" {
continue
}
capPrefixed := toCAPPrefixed ( cap )
if err := specgen . DropProcessCapabilityAmbient ( capPrefixed ) ; err != nil {
return fmt . Errorf ( "failed to drop cap %s %v" , capPrefixed , err )
}
if err := specgen . DropProcessCapabilityBounding ( capPrefixed ) ; err != nil {
return fmt . Errorf ( "failed to drop cap %s %v" , capPrefixed , err )
}
if err := specgen . DropProcessCapabilityEffective ( capPrefixed ) ; err != nil {
return fmt . Errorf ( "failed to drop cap %s %v" , capPrefixed , err )
}
if err := specgen . DropProcessCapabilityInheritable ( capPrefixed ) ; err != nil {
return fmt . Errorf ( "failed to drop cap %s %v" , capPrefixed , err )
}
if err := specgen . DropProcessCapabilityPermitted ( capPrefixed ) ; err != nil {
return fmt . Errorf ( "failed to drop cap %s %v" , capPrefixed , err )
}
}
return nil
}
2017-04-22 00:54:29 +00:00
func hostNetwork ( containerConfig * pb . ContainerConfig ) bool {
securityContext := containerConfig . GetLinux ( ) . GetSecurityContext ( )
if securityContext == nil || securityContext . GetNamespaceOptions ( ) == nil {
return false
}
return securityContext . GetNamespaceOptions ( ) . HostNetwork
}
2017-04-04 14:11:53 +00:00
// ensureSaneLogPath is a hack to fix https://issues.k8s.io/44043 which causes
// logPath to be a broken symlink to some magical Docker path. Ideally we
// wouldn't have to deal with this, but until that issue is fixed we have to
// remove the path if it's a broken symlink.
func ensureSaneLogPath ( logPath string ) error {
// If the path exists but the resolved path does not, then we have a broken
// symlink and we need to remove it.
fi , err := os . Lstat ( logPath )
if err != nil || fi . Mode ( ) & os . ModeSymlink == 0 {
2017-08-15 02:15:01 +00:00
// Non-existent files and non-symlinks aren't our problem.
2017-04-04 14:11:53 +00:00
return nil
}
_ , err = os . Stat ( logPath )
if os . IsNotExist ( err ) {
err = os . RemoveAll ( logPath )
if err != nil {
return fmt . Errorf ( "ensureSaneLogPath remove bad logPath: %s" , err )
}
}
return nil
}
2017-09-22 15:10:15 +00:00
// addSecretsBindMounts mounts user defined secrets to the container
2017-10-28 21:43:20 +00:00
func addSecretsBindMounts ( mountLabel , ctrRunDir string , defaultMounts [ ] string , specgen generate . Generator ) ( [ ] rspec . Mount , error ) {
2017-10-12 18:14:42 +00:00
containerMounts := specgen . Spec ( ) . Mounts
mounts , err := secretMounts ( defaultMounts , mountLabel , ctrRunDir , containerMounts )
if err != nil {
2017-10-28 21:43:20 +00:00
return nil , err
2017-09-22 15:10:15 +00:00
}
2017-10-28 21:43:20 +00:00
return mounts , nil
2017-09-22 15:10:15 +00:00
}
2016-11-22 16:49:54 +00:00
// CreateContainer creates a new container in specified PodSandbox
func ( s * Server ) CreateContainer ( ctx context . Context , req * pb . CreateContainerRequest ) ( res * pb . CreateContainerResponse , err error ) {
2017-11-09 10:10:35 +00:00
const operation = "create_container"
defer func ( ) {
recordOperation ( operation , time . Now ( ) )
recordError ( operation , err )
} ( )
2016-11-22 16:49:54 +00:00
logrus . Debugf ( "CreateContainerRequest %+v" , req )
2017-04-04 15:24:55 +00:00
s . updateLock . RLock ( )
defer s . updateLock . RUnlock ( )
2017-02-03 14:41:28 +00:00
sbID := req . PodSandboxId
2016-11-22 16:49:54 +00:00
if sbID == "" {
return nil , fmt . Errorf ( "PodSandboxId should not be empty" )
}
2017-07-25 15:36:33 +00:00
sandboxID , err := s . PodIDIndex ( ) . Get ( sbID )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , fmt . Errorf ( "PodSandbox with ID starting with %s not found: %v" , sbID , err )
}
sb := s . getSandbox ( sandboxID )
if sb == nil {
return nil , fmt . Errorf ( "specified sandbox not found: %s" , sandboxID )
}
// The config of the container
containerConfig := req . GetConfig ( )
if containerConfig == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig is nil" )
}
2018-01-12 14:45:44 +00:00
if containerConfig . GetMetadata ( ) == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Metadata is nil" )
}
name := containerConfig . GetMetadata ( ) . GetName ( )
2016-11-22 16:49:54 +00:00
if name == "" {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Name is empty" )
}
2017-07-19 19:03:22 +00:00
containerID , containerName , err := s . generateContainerIDandName ( sb . Metadata ( ) , containerConfig )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
defer func ( ) {
if err != nil {
2017-07-20 17:10:16 +00:00
s . ReleaseContainerName ( containerName )
2016-11-22 16:49:54 +00:00
}
} ( )
2016-10-18 14:48:33 +00:00
container , err := s . createSandboxContainer ( ctx , containerID , containerName , sb , req . GetSandboxConfig ( ) , containerConfig )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
2016-10-18 14:48:33 +00:00
defer func ( ) {
if err != nil {
2017-07-31 18:38:45 +00:00
err2 := s . StorageRuntimeServer ( ) . DeleteContainer ( containerID )
2016-10-18 14:48:33 +00:00
if err2 != nil {
logrus . Warnf ( "Failed to cleanup container directory: %v" , err2 )
}
}
} ( )
2016-11-22 16:49:54 +00:00
2017-07-19 19:03:22 +00:00
if err = s . Runtime ( ) . CreateContainer ( container , sb . CgroupParent ( ) ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
s . addContainer ( container )
2017-07-17 12:25:32 +00:00
if err = s . CtrIDIndex ( ) . Add ( containerID ) ; err != nil {
2016-11-22 16:49:54 +00:00
s . removeContainer ( container )
return nil , err
}
2017-07-20 17:05:12 +00:00
s . ContainerStateToDisk ( container )
2017-05-11 10:03:59 +00:00
2016-11-22 16:49:54 +00:00
resp := & pb . CreateContainerResponse {
2017-02-03 14:41:28 +00:00
ContainerId : containerID ,
2016-11-22 16:49:54 +00:00
}
logrus . Debugf ( "CreateContainerResponse: %+v" , resp )
return resp , nil
}
2017-08-12 10:29:22 +00:00
func ( s * Server ) setupOCIHooks ( specgen * generate . Generator , sb * sandbox . Sandbox , containerConfig * pb . ContainerConfig , command string ) error {
mounts := containerConfig . GetMounts ( )
addedHooks := map [ string ] struct { } { }
2017-11-30 15:46:11 +00:00
addHook := func ( hook lib . HookParams ) error {
2017-08-12 10:29:22 +00:00
// Only add a hook once
if _ , ok := addedHooks [ hook . Hook ] ; ! ok {
if err := addOCIHook ( specgen , hook ) ; err != nil {
return err
}
addedHooks [ hook . Hook ] = struct { } { }
}
return nil
}
for _ , hook := range s . Hooks ( ) {
logrus . Debugf ( "SetupOCIHooks" , hook )
if hook . HasBindMounts && len ( mounts ) > 0 {
if err := addHook ( hook ) ; err != nil {
return err
}
continue
}
for _ , cmd := range hook . Cmds {
match , err := regexp . MatchString ( cmd , command )
if err != nil {
logrus . Errorf ( "Invalid regex %q:%q" , cmd , err )
continue
}
if match {
if err := addHook ( hook ) ; err != nil {
return err
}
}
}
for _ , annotationRegex := range hook . Annotations {
for _ , annotation := range sb . Annotations ( ) {
match , err := regexp . MatchString ( annotationRegex , annotation )
if err != nil {
logrus . Errorf ( "Invalid regex %q:%q" , annotationRegex , err )
continue
}
if match {
if err := addHook ( hook ) ; err != nil {
return err
}
}
}
}
}
return nil
}
2017-07-19 19:03:22 +00:00
func ( s * Server ) createSandboxContainer ( ctx context . Context , containerID string , containerName string , sb * sandbox . Sandbox , SandboxConfig * pb . PodSandboxConfig , containerConfig * pb . ContainerConfig ) ( * oci . Container , error ) {
2016-11-22 16:49:54 +00:00
if sb == nil {
return nil , errors . New ( "createSandboxContainer needs a sandbox" )
}
2016-10-18 14:48:33 +00:00
2017-02-03 14:41:28 +00:00
// TODO: simplify this function (cyclomatic complexity here is high)
2016-10-18 14:48:33 +00:00
// TODO: factor generating/updating the spec into something other projects can vendor
2016-11-22 16:49:54 +00:00
// creates a spec Generator with the default spec.
specgen := generate . New ( )
2017-05-30 15:04:21 +00:00
specgen . HostSpecific = true
2017-07-03 21:49:34 +00:00
specgen . ClearProcessRlimits ( )
2016-11-22 16:49:54 +00:00
2017-09-20 13:19:58 +00:00
var readOnlyRootfs bool
var privileged bool
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) != nil {
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . Privileged {
privileged = true
}
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . ReadonlyRootfs {
readOnlyRootfs = true
specgen . SetRootReadonly ( true )
}
}
2017-09-13 19:06:54 +00:00
mountLabel := sb . MountLabel ( )
processLabel := sb . ProcessLabel ( )
selinuxConfig := containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetSelinuxOptions ( )
if selinuxConfig != nil {
var err error
2017-09-20 13:19:58 +00:00
processLabel , mountLabel , err = getSELinuxLabels ( selinuxConfig , privileged )
2017-09-13 19:06:54 +00:00
if err != nil {
return nil , err
}
}
2017-10-28 21:43:20 +00:00
containerVolumes , ociMounts , err := addOCIBindMounts ( mountLabel , containerConfig , & specgen )
2017-08-14 19:52:25 +00:00
if err != nil {
return nil , err
}
volumesJSON , err := json . Marshal ( containerVolumes )
if err != nil {
2017-02-22 18:42:44 +00:00
return nil , err
2016-11-22 16:49:54 +00:00
}
2017-08-14 19:52:25 +00:00
specgen . AddAnnotation ( annotations . Volumes , string ( volumesJSON ) )
2016-11-22 16:49:54 +00:00
2018-01-04 15:53:55 +00:00
mnt := rspec . Mount {
Destination : "/sys/fs/cgroup" ,
Type : "cgroup" ,
Source : "cgroup" ,
Options : [ ] string { "nosuid" , "noexec" , "nodev" , "relatime" , "ro" } ,
}
2017-07-07 23:32:37 +00:00
// Add cgroup mount so container process can introspect its own limits
2018-01-04 15:53:55 +00:00
specgen . AddMount ( mnt )
2017-07-07 23:32:37 +00:00
2017-05-08 22:11:36 +00:00
if err := addDevices ( sb , containerConfig , & specgen ) ; err != nil {
return nil , err
}
2016-11-22 16:49:54 +00:00
labels := containerConfig . GetLabels ( )
2017-11-11 11:00:48 +00:00
if err := validateLabels ( labels ) ; err != nil {
return nil , err
}
2016-11-22 16:49:54 +00:00
metadata := containerConfig . GetMetadata ( )
2017-06-01 16:40:33 +00:00
kubeAnnotations := containerConfig . GetAnnotations ( )
if kubeAnnotations != nil {
for k , v := range kubeAnnotations {
2016-11-22 16:49:54 +00:00
specgen . AddAnnotation ( k , v )
}
}
2017-09-04 16:11:32 +00:00
if labels != nil {
for k , v := range labels {
specgen . AddAnnotation ( k , v )
}
}
2016-11-24 13:27:56 +00:00
// set this container's apparmor profile if it is set by sandbox
2017-09-06 11:25:19 +00:00
if s . appArmorEnabled && ! privileged {
2017-11-30 15:12:16 +00:00
appArmorProfileName := s . getAppArmorProfileName ( containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetApparmorProfile ( ) )
2016-11-29 12:34:15 +00:00
if appArmorProfileName != "" {
2016-12-12 07:55:17 +00:00
// reload default apparmor profile if it is unloaded.
if s . appArmorProfile == apparmor . DefaultApparmorProfile {
if err := apparmor . EnsureDefaultApparmorProfile ( ) ; err != nil {
return nil , err
}
}
2016-11-29 12:34:15 +00:00
specgen . SetProcessApparmorProfile ( appArmorProfileName )
}
2017-11-30 15:12:16 +00:00
2016-11-24 13:27:56 +00:00
}
2016-11-22 16:49:54 +00:00
2017-02-03 14:41:28 +00:00
logPath := containerConfig . LogPath
2016-10-07 15:59:39 +00:00
if logPath == "" {
// TODO: Should we use sandboxConfig.GetLogDirectory() here?
2017-07-19 19:03:22 +00:00
logPath = filepath . Join ( sb . LogDir ( ) , containerID + ".log" )
2016-10-07 15:59:39 +00:00
}
if ! filepath . IsAbs ( logPath ) {
// XXX: It's not really clear what this should be versus the sbox logDirectory.
logrus . Warnf ( "requested logPath for ctr id %s is a relative path: %s" , containerID , logPath )
2017-07-19 19:03:22 +00:00
logPath = filepath . Join ( sb . LogDir ( ) , logPath )
2016-10-07 15:59:39 +00:00
}
2017-04-04 14:11:53 +00:00
// Handle https://issues.k8s.io/44043
if err := ensureSaneLogPath ( logPath ) ; err != nil {
return nil , err
}
2016-10-07 15:59:39 +00:00
logrus . WithFields ( logrus . Fields {
2017-07-19 19:03:22 +00:00
"sbox.logdir" : sb . LogDir ( ) ,
2016-10-07 15:59:39 +00:00
"ctr.logfile" : containerConfig . LogPath ,
"log_path" : logPath ,
} ) . Debugf ( "setting container's log_path" )
2017-02-03 14:41:28 +00:00
specgen . SetProcessTerminal ( containerConfig . Tty )
2017-09-06 14:38:01 +00:00
if containerConfig . Tty {
specgen . AddProcessEnv ( "TERM" , "xterm" )
}
2016-11-22 16:49:54 +00:00
linux := containerConfig . GetLinux ( )
if linux != nil {
resources := linux . GetResources ( )
if resources != nil {
2017-12-20 17:23:37 +00:00
specgen . SetLinuxResourcesCPUPeriod ( uint64 ( resources . GetCpuPeriod ( ) ) )
specgen . SetLinuxResourcesCPUQuota ( resources . GetCpuQuota ( ) )
specgen . SetLinuxResourcesCPUShares ( uint64 ( resources . GetCpuShares ( ) ) )
specgen . SetLinuxResourcesMemoryLimit ( resources . GetMemoryLimitInBytes ( ) )
specgen . SetProcessOOMScoreAdj ( int ( resources . GetOomScoreAdj ( ) ) )
specgen . SetLinuxResourcesCPUCpus ( resources . GetCpusetCpus ( ) )
specgen . SetLinuxResourcesCPUMems ( resources . GetCpusetMems ( ) )
2016-11-22 16:49:54 +00:00
}
2017-08-29 15:52:05 +00:00
var cgPath string
2017-08-29 21:11:30 +00:00
parent := defaultCgroupfsParent
useSystemd := s . config . CgroupManager == oci . SystemdCgroupsManager
2017-08-29 15:52:05 +00:00
if useSystemd {
2017-08-29 21:11:30 +00:00
parent = defaultSystemdParent
2017-08-29 15:52:05 +00:00
}
2017-07-19 19:03:22 +00:00
if sb . CgroupParent ( ) != "" {
2017-08-29 15:52:05 +00:00
parent = sb . CgroupParent ( )
}
if useSystemd {
cgPath = parent + ":" + scopePrefix + ":" + containerID
} else {
cgPath = filepath . Join ( parent , scopePrefix + "-" + containerID )
2016-12-13 08:34:55 +00:00
}
2017-08-29 15:52:05 +00:00
specgen . SetLinuxCgroupsPath ( cgPath )
2016-12-13 08:34:55 +00:00
2017-09-06 11:25:19 +00:00
if privileged {
specgen . SetupPrivileged ( true )
2017-09-20 13:19:58 +00:00
setOCIBindMountsPrivileged ( & specgen )
2017-09-06 11:25:19 +00:00
} else {
2018-01-19 17:49:28 +00:00
err = setupCapabilities ( & specgen , linux . GetSecurityContext ( ) . GetCapabilities ( ) )
if err != nil {
return nil , err
2016-11-22 16:49:54 +00:00
}
}
2017-09-20 13:19:58 +00:00
specgen . SetProcessSelinuxLabel ( processLabel )
specgen . SetLinuxMountLabel ( mountLabel )
2017-09-27 12:34:33 +00:00
specgen . SetProcessNoNewPrivileges ( linux . GetSecurityContext ( ) . GetNoNewPrivs ( ) )
2016-11-22 16:49:54 +00:00
2017-05-31 00:03:54 +00:00
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) != nil &&
2017-06-16 22:49:16 +00:00
! containerConfig . GetLinux ( ) . GetSecurityContext ( ) . Privileged {
2017-05-31 00:03:54 +00:00
for _ , mp := range [ ] string {
"/proc/kcore" ,
"/proc/latency_stats" ,
"/proc/timer_list" ,
"/proc/timer_stats" ,
"/proc/sched_debug" ,
2017-11-22 11:25:43 +00:00
"/proc/scsi" ,
2017-05-31 00:03:54 +00:00
"/sys/firmware" ,
} {
specgen . AddLinuxMaskedPaths ( mp )
}
2017-05-12 10:47:40 +00:00
2017-05-31 00:03:54 +00:00
for _ , rp := range [ ] string {
"/proc/asound" ,
"/proc/bus" ,
"/proc/fs" ,
"/proc/irq" ,
"/proc/sys" ,
"/proc/sysrq-trigger" ,
} {
specgen . AddLinuxReadonlyPaths ( rp )
}
2017-05-12 10:47:40 +00:00
}
2016-11-22 16:49:54 +00:00
}
// Join the namespace paths for the pod sandbox container.
2017-07-19 19:03:22 +00:00
podInfraState := s . Runtime ( ) . ContainerStatus ( sb . InfraContainer ( ) )
2016-11-22 16:49:54 +00:00
logrus . Debugf ( "pod container state %+v" , podInfraState )
2016-11-23 17:16:21 +00:00
ipcNsPath := fmt . Sprintf ( "/proc/%d/ns/ipc" , podInfraState . Pid )
2017-10-19 19:12:55 +00:00
if err := specgen . AddOrReplaceLinuxNamespace ( string ( rspec . IPCNamespace ) , ipcNsPath ) ; err != nil {
2016-11-23 17:16:21 +00:00
return nil , err
}
2017-10-19 19:12:55 +00:00
utsNsPath := fmt . Sprintf ( "/proc/%d/ns/uts" , podInfraState . Pid )
if err := specgen . AddOrReplaceLinuxNamespace ( string ( rspec . UTSNamespace ) , utsNsPath ) ; err != nil {
return nil , err
}
if containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetNamespaceOptions ( ) . GetHostPid ( ) {
2017-11-10 03:24:34 +00:00
// kubernetes PodSpec specify to use Host PID namespace
2017-10-19 19:12:55 +00:00
specgen . RemoveLinuxNamespace ( string ( rspec . PIDNamespace ) )
2017-11-18 00:52:06 +00:00
} else if s . config . EnableSharedPIDNamespace {
2017-11-10 03:24:34 +00:00
// share Pod PID namespace
pidNsPath := fmt . Sprintf ( "/proc/%d/ns/pid" , podInfraState . Pid )
if err := specgen . AddOrReplaceLinuxNamespace ( string ( rspec . PIDNamespace ) , pidNsPath ) ; err != nil {
return nil , err
}
2017-10-19 19:12:55 +00:00
}
2017-07-18 20:35:15 +00:00
netNsPath := sb . NetNsPath ( )
2016-11-23 17:16:21 +00:00
if netNsPath == "" {
// The sandbox does not have a permanent namespace,
// it's on the host one.
netNsPath = fmt . Sprintf ( "/proc/%d/ns/net" , podInfraState . Pid )
}
2017-10-19 19:12:55 +00:00
if err := specgen . AddOrReplaceLinuxNamespace ( string ( rspec . NetworkNamespace ) , netNsPath ) ; err != nil {
2016-11-23 17:16:21 +00:00
return nil , err
2016-11-22 16:49:54 +00:00
}
2016-12-12 10:12:03 +00:00
imageSpec := containerConfig . GetImage ( )
if imageSpec == nil {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Image is nil" )
}
2017-02-03 14:41:28 +00:00
image := imageSpec . Image
2016-12-12 10:12:03 +00:00
if image == "" {
return nil , fmt . Errorf ( "CreateContainerRequest.ContainerConfig.Image.Image is empty" )
}
2017-07-20 08:01:23 +00:00
images , err := s . StorageImageServer ( ) . ResolveNames ( image )
if err != nil {
2017-11-28 00:34:55 +00:00
if err == storage . ErrCannotParseImageID {
2017-07-20 08:01:23 +00:00
images = append ( images , image )
} else {
return nil , err
}
}
2016-12-12 10:12:03 +00:00
Return image references from the storage package
The image's canonical reference is a name with a digest of the image's
manifest, so in imageService.ImageStatus() and
imageService.ListImages(), divide the image's name list into tagged and
digested values, and if we have names, add canonical versions.
In Server.ContainerStatus(), return the image name as it was given to us
as the image, and the image digested reference as the image reference.
In Server.ListImages(), be sure to only return tagged names in the
RepoTags field. In Server.ImageStatus(), also return canonical
references in the RepoDigests field.
In Server.PullImage(), be sure that we consistently return the same
image reference for an image, whether we ended up pulling it or not.
Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
2017-07-12 16:41:38 +00:00
// Get imageName and imageRef that are later requested in container status
status , err := s . StorageImageServer ( ) . ImageStatus ( s . ImageContext ( ) , images [ 0 ] )
2017-08-14 19:29:53 +00:00
if err != nil {
return nil , err
}
Return image references from the storage package
The image's canonical reference is a name with a digest of the image's
manifest, so in imageService.ImageStatus() and
imageService.ListImages(), divide the image's name list into tagged and
digested values, and if we have names, add canonical versions.
In Server.ContainerStatus(), return the image name as it was given to us
as the image, and the image digested reference as the image reference.
In Server.ListImages(), be sure to only return tagged names in the
RepoTags field. In Server.ImageStatus(), also return canonical
references in the RepoDigests field.
In Server.PullImage(), be sure that we consistently return the same
image reference for an image, whether we ended up pulling it or not.
Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
2017-07-12 16:41:38 +00:00
imageName := status . Name
2017-08-14 19:29:53 +00:00
imageRef := status . ID
Return image references from the storage package
The image's canonical reference is a name with a digest of the image's
manifest, so in imageService.ImageStatus() and
imageService.ListImages(), divide the image's name list into tagged and
digested values, and if we have names, add canonical versions.
In Server.ContainerStatus(), return the image name as it was given to us
as the image, and the image digested reference as the image reference.
In Server.ListImages(), be sure to only return tagged names in the
RepoTags field. In Server.ImageStatus(), also return canonical
references in the RepoDigests field.
In Server.PullImage(), be sure that we consistently return the same
image reference for an image, whether we ended up pulling it or not.
Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
2017-07-12 16:41:38 +00:00
if len ( status . RepoDigests ) > 0 {
imageRef = status . RepoDigests [ 0 ]
2017-08-14 19:29:53 +00:00
}
Return image references from the storage package
The image's canonical reference is a name with a digest of the image's
manifest, so in imageService.ImageStatus() and
imageService.ListImages(), divide the image's name list into tagged and
digested values, and if we have names, add canonical versions.
In Server.ContainerStatus(), return the image name as it was given to us
as the image, and the image digested reference as the image reference.
In Server.ListImages(), be sure to only return tagged names in the
RepoTags field. In Server.ImageStatus(), also return canonical
references in the RepoDigests field.
In Server.PullImage(), be sure that we consistently return the same
image reference for an image, whether we ended up pulling it or not.
Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
2017-07-12 16:41:38 +00:00
specgen . AddAnnotation ( annotations . Image , image )
2017-08-14 19:29:53 +00:00
specgen . AddAnnotation ( annotations . ImageName , imageName )
specgen . AddAnnotation ( annotations . ImageRef , imageRef )
2017-08-29 23:00:49 +00:00
specgen . AddAnnotation ( annotations . IP , sb . IP ( ) )
2017-08-14 19:29:53 +00:00
2018-01-04 15:53:55 +00:00
mnt = rspec . Mount {
Type : "bind" ,
Source : sb . ShmPath ( ) ,
Destination : "/etc/shm" ,
Options : [ ] string { "rw" , "bind" } ,
}
2016-12-08 23:32:17 +00:00
// bind mount the pod shm
2018-01-04 15:53:55 +00:00
specgen . AddMount ( mnt )
2016-12-08 23:32:17 +00:00
2017-06-14 13:28:13 +00:00
options := [ ] string { "rw" }
if readOnlyRootfs {
options = [ ] string { "ro" }
}
2017-07-19 19:03:22 +00:00
if sb . ResolvPath ( ) != "" {
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( sb . ResolvPath ( ) , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
return nil , err
}
2017-09-13 08:28:41 +00:00
2018-01-04 15:53:55 +00:00
mnt = rspec . Mount {
Type : "bind" ,
Source : sb . ResolvPath ( ) ,
Destination : "/etc/resolv.conf" ,
Options : append ( options , "bind" ) ,
}
2017-03-24 14:32:16 +00:00
// bind mount the pod resolver file
2018-01-04 15:53:55 +00:00
specgen . AddMount ( mnt )
2017-03-24 14:32:16 +00:00
}
2017-09-13 08:28:41 +00:00
if sb . HostnamePath ( ) != "" {
2017-09-13 19:06:54 +00:00
if err := label . Relabel ( sb . HostnamePath ( ) , mountLabel , true ) ; err != nil && err != unix . ENOTSUP {
return nil , err
}
2017-09-13 08:28:41 +00:00
2018-01-04 15:53:55 +00:00
mnt = rspec . Mount {
Type : "bind" ,
Source : sb . HostnamePath ( ) ,
Destination : "/etc/hostname" ,
Options : append ( options , "bind" ) ,
}
specgen . AddMount ( mnt )
2017-09-13 08:28:41 +00:00
}
2018-01-24 12:29:28 +00:00
isInCRIMounts := func ( dst string , mounts [ ] * pb . Mount ) bool {
for _ , m := range mounts {
if m . ContainerPath == dst {
return true
}
}
return false
}
if ! isInCRIMounts ( "/etc/hosts" , containerConfig . GetMounts ( ) ) && hostNetwork ( containerConfig ) {
// Only bind mount for host netns and when CRI does not give us any hosts file
2018-01-04 15:53:55 +00:00
mnt = rspec . Mount {
Type : "bind" ,
Source : "/etc/hosts" ,
Destination : "/etc/hosts" ,
Options : append ( options , "bind" ) ,
}
specgen . AddMount ( mnt )
2017-04-22 00:54:29 +00:00
}
2017-11-02 17:20:45 +00:00
// Set hostname and add env for hostname
2017-09-13 08:28:41 +00:00
specgen . SetHostname ( sb . Hostname ( ) )
2017-11-02 17:20:45 +00:00
specgen . AddProcessEnv ( "HOSTNAME" , sb . Hostname ( ) )
2017-03-29 23:11:57 +00:00
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Name , containerName )
2017-06-23 16:31:13 +00:00
specgen . AddAnnotation ( annotations . ContainerID , containerID )
2017-07-19 19:03:22 +00:00
specgen . AddAnnotation ( annotations . SandboxID , sb . ID ( ) )
specgen . AddAnnotation ( annotations . SandboxName , sb . InfraContainer ( ) . Name ( ) )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . ContainerType , annotations . ContainerTypeContainer )
specgen . AddAnnotation ( annotations . LogPath , logPath )
specgen . AddAnnotation ( annotations . TTY , fmt . Sprintf ( "%v" , containerConfig . Tty ) )
2017-06-08 20:08:29 +00:00
specgen . AddAnnotation ( annotations . Stdin , fmt . Sprintf ( "%v" , containerConfig . Stdin ) )
specgen . AddAnnotation ( annotations . StdinOnce , fmt . Sprintf ( "%v" , containerConfig . StdinOnce ) )
2017-09-22 23:44:02 +00:00
specgen . AddAnnotation ( annotations . ResolvPath , sb . InfraContainer ( ) . CrioAnnotations ( ) [ annotations . ResolvPath ] )
2016-11-22 16:49:54 +00:00
2017-05-11 09:22:47 +00:00
created := time . Now ( )
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Created , created . Format ( time . RFC3339Nano ) )
2017-05-11 09:22:47 +00:00
2016-11-22 16:49:54 +00:00
metadataJSON , err := json . Marshal ( metadata )
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Metadata , string ( metadataJSON ) )
2016-11-22 16:49:54 +00:00
labelsJSON , err := json . Marshal ( labels )
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Labels , string ( labelsJSON ) )
2016-11-22 16:49:54 +00:00
2017-06-01 16:40:33 +00:00
kubeAnnotationsJSON , err := json . Marshal ( kubeAnnotations )
2016-12-12 17:55:34 +00:00
if err != nil {
return nil , err
}
2017-06-01 16:40:33 +00:00
specgen . AddAnnotation ( annotations . Annotations , string ( kubeAnnotationsJSON ) )
2016-12-12 17:55:34 +00:00
2017-09-27 18:46:31 +00:00
spp := containerConfig . GetLinux ( ) . GetSecurityContext ( ) . GetSeccompProfilePath ( )
2017-09-06 11:25:19 +00:00
if ! privileged {
2017-09-27 18:46:31 +00:00
if err = s . setupSeccomp ( & specgen , spp ) ; err != nil {
2017-09-06 11:25:19 +00:00
return nil , err
}
2016-11-23 09:41:48 +00:00
}
2017-09-27 18:46:31 +00:00
specgen . AddAnnotation ( annotations . SeccompProfilePath , spp )
2016-11-23 09:41:48 +00:00
2017-09-27 18:46:31 +00:00
metaname := metadata . Name
2017-02-03 14:41:28 +00:00
attempt := metadata . Attempt
2017-07-31 18:38:45 +00:00
containerInfo , err := s . StorageRuntimeServer ( ) . CreateContainer ( s . ImageContext ( ) ,
2017-07-19 19:03:22 +00:00
sb . Name ( ) , sb . ID ( ) ,
Return image references from the storage package
The image's canonical reference is a name with a digest of the image's
manifest, so in imageService.ImageStatus() and
imageService.ListImages(), divide the image's name list into tagged and
digested values, and if we have names, add canonical versions.
In Server.ContainerStatus(), return the image name as it was given to us
as the image, and the image digested reference as the image reference.
In Server.ListImages(), be sure to only return tagged names in the
RepoTags field. In Server.ImageStatus(), also return canonical
references in the RepoDigests field.
In Server.PullImage(), be sure that we consistently return the same
image reference for an image, whether we ended up pulling it or not.
Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
2017-07-12 16:41:38 +00:00
image , status . ID ,
2016-10-18 14:48:33 +00:00
containerName , containerID ,
metaname ,
attempt ,
2017-09-13 19:06:54 +00:00
mountLabel ,
2016-10-18 14:48:33 +00:00
nil )
if err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
2017-12-01 16:04:51 +00:00
defer func ( ) {
if err != nil {
err2 := s . StorageRuntimeServer ( ) . DeleteContainer ( containerInfo . ID )
if err2 != nil {
logrus . Warnf ( "Failed to cleanup container directory: %v" , err2 )
}
}
} ( )
2016-11-22 16:49:54 +00:00
2017-07-31 18:38:45 +00:00
mountPoint , err := s . StorageRuntimeServer ( ) . StartContainer ( containerID )
2016-10-18 14:48:33 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to mount container %s(%s): %v" , containerName , containerID , err )
}
2017-08-31 13:16:25 +00:00
specgen . AddAnnotation ( annotations . MountPoint , mountPoint )
2016-10-18 14:48:33 +00:00
2017-04-04 22:39:59 +00:00
containerImageConfig := containerInfo . Config
2017-06-07 16:38:04 +00:00
if containerImageConfig == nil {
2017-12-01 16:04:51 +00:00
err = fmt . Errorf ( "empty image config for %s" , image )
return nil , err
2017-06-07 16:38:04 +00:00
}
2017-04-04 22:39:59 +00:00
2017-05-26 16:31:28 +00:00
if containerImageConfig . Config . StopSignal != "" {
// this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
specgen . AddAnnotation ( "org.opencontainers.image.stopSignal" , containerImageConfig . Config . StopSignal )
}
2017-07-14 22:32:25 +00:00
// Add image volumes
2017-10-28 21:43:20 +00:00
volumeMounts , err := addImageVolumes ( mountPoint , s , & containerInfo , & specgen , mountLabel )
if err != nil {
2017-07-14 22:32:25 +00:00
return nil , err
2017-05-22 15:19:49 +00:00
}
2017-04-04 22:39:59 +00:00
processArgs , err := buildOCIProcessArgs ( containerConfig , containerImageConfig )
2017-03-16 09:59:41 +00:00
if err != nil {
return nil , err
2016-10-18 14:48:33 +00:00
}
specgen . SetProcessArgs ( processArgs )
2017-11-30 10:52:30 +00:00
envs := mergeEnvs ( containerImageConfig , containerConfig . GetEnvs ( ) )
for _ , e := range envs {
parts := strings . SplitN ( e , "=" , 2 )
specgen . AddProcessEnv ( parts [ 0 ] , parts [ 1 ] )
2017-03-27 22:53:47 +00:00
}
// Set working directory
// Pick it up from image config first and override if specified in CRI
2017-03-31 21:04:16 +00:00
containerCwd := "/"
2017-04-04 22:39:59 +00:00
if containerImageConfig != nil {
imageCwd := containerImageConfig . Config . WorkingDir
if imageCwd != "" {
containerCwd = imageCwd
}
2017-03-31 21:04:16 +00:00
}
runtimeCwd := containerConfig . WorkingDir
if runtimeCwd != "" {
containerCwd = runtimeCwd
2017-03-27 22:53:47 +00:00
}
2017-03-31 21:04:16 +00:00
specgen . SetProcessCwd ( containerCwd )
2017-11-03 17:59:52 +00:00
if err := setupWorkingDirectory ( mountPoint , mountLabel , containerCwd ) ; err != nil {
if err1 := s . StorageRuntimeServer ( ) . StopContainer ( containerID ) ; err1 != nil {
return nil , fmt . Errorf ( "can't umount container after cwd error %v: %v" , err , err1 )
}
return nil , err
}
2017-03-27 22:53:47 +00:00
2017-10-28 21:43:20 +00:00
var secretMounts [ ] rspec . Mount
if len ( s . config . DefaultMounts ) > 0 {
var err error
secretMounts , err = addSecretsBindMounts ( mountLabel , containerInfo . RunDir , s . config . DefaultMounts , specgen )
if err != nil {
return nil , fmt . Errorf ( "failed to mount secrets: %v" , err )
}
}
mounts := [ ] rspec . Mount { }
mounts = append ( mounts , ociMounts ... )
mounts = append ( mounts , volumeMounts ... )
mounts = append ( mounts , secretMounts ... )
sort . Sort ( orderedMounts ( mounts ) )
for _ , m := range mounts {
2018-01-04 15:53:55 +00:00
mnt = rspec . Mount {
Type : "bind" ,
Source : m . Source ,
Destination : m . Destination ,
Options : append ( m . Options , "bind" ) ,
}
specgen . AddMount ( mnt )
2017-10-28 21:43:20 +00:00
}
2017-08-12 10:29:22 +00:00
if err := s . setupOCIHooks ( & specgen , sb , containerConfig , processArgs [ 0 ] ) ; err != nil {
return nil , err
}
2017-03-29 18:23:33 +00:00
// Setup user and groups
if linux != nil {
2017-04-04 22:39:59 +00:00
if err = setupContainerUser ( & specgen , mountPoint , linux . GetSecurityContext ( ) , containerImageConfig ) ; err != nil {
2017-03-29 18:23:33 +00:00
return nil , err
}
}
2017-07-07 21:43:35 +00:00
// Set up pids limit if pids cgroup is mounted
_ , err = cgroups . FindCgroupMountpoint ( "pids" )
if err == nil {
specgen . SetLinuxResourcesPidsLimit ( s . config . PidsLimit )
}
2016-10-18 14:48:33 +00:00
// by default, the root path is an empty string. set it now.
specgen . SetRootPath ( mountPoint )
saveOptions := generate . ExportOptions { }
if err = specgen . SaveToFile ( filepath . Join ( containerInfo . Dir , "config.json" ) , saveOptions ) ; err != nil {
return nil , err
}
if err = specgen . SaveToFile ( filepath . Join ( containerInfo . RunDir , "config.json" ) , saveOptions ) ; err != nil {
2016-11-22 16:49:54 +00:00
return nil , err
}
2017-09-22 23:44:02 +00:00
crioAnnotations := specgen . Spec ( ) . Annotations
container , err := oci . NewContainer ( containerID , containerName , containerInfo . RunDir , logPath , sb . NetNs ( ) , labels , crioAnnotations , kubeAnnotations , image , imageName , imageRef , metadata , sb . ID ( ) , containerConfig . Tty , containerConfig . Stdin , containerConfig . StdinOnce , sb . Privileged ( ) , sb . Trusted ( ) , containerInfo . Dir , created , containerImageConfig . Config . StopSignal )
2016-11-22 16:49:54 +00:00
if err != nil {
return nil , err
}
2017-10-19 13:02:56 +00:00
container . SetSpec ( specgen . Spec ( ) )
2017-08-31 13:16:25 +00:00
container . SetMountPoint ( mountPoint )
2017-11-12 16:51:32 +00:00
container . SetSeccompProfilePath ( spp )
2016-11-22 16:49:54 +00:00
2017-08-14 19:52:25 +00:00
for _ , cv := range containerVolumes {
container . AddVolume ( cv )
}
2016-11-22 16:49:54 +00:00
return container , nil
}
2017-09-27 18:46:31 +00:00
func ( s * Server ) setupSeccomp ( specgen * generate . Generator , profile string ) error {
if profile == "" {
// running w/o seccomp, aka unconfined
specgen . Spec ( ) . Linux . Seccomp = nil
return nil
2016-11-23 09:41:48 +00:00
}
if ! s . seccompEnabled {
if profile != seccompUnconfined {
return fmt . Errorf ( "seccomp is not enabled in your kernel, cannot run with a profile" )
}
logrus . Warn ( "seccomp is not enabled in your kernel, running container without profile" )
}
if profile == seccompUnconfined {
// running w/o seccomp, aka unconfined
specgen . Spec ( ) . Linux . Seccomp = nil
return nil
}
2017-09-18 10:50:39 +00:00
if profile == seccompRuntimeDefault || profile == seccompDockerDefault {
2016-11-23 09:41:48 +00:00
return seccomp . LoadProfileFromStruct ( s . seccompProfile , specgen )
}
if ! strings . HasPrefix ( profile , seccompLocalhostPrefix ) {
return fmt . Errorf ( "unknown seccomp profile option: %q" , profile )
}
2017-09-27 18:46:31 +00:00
fname := strings . TrimPrefix ( profile , "localhost/" )
file , err := ioutil . ReadFile ( filepath . FromSlash ( fname ) )
if err != nil {
return fmt . Errorf ( "cannot load seccomp profile %q: %v" , fname , err )
}
return seccomp . LoadProfileFromBytes ( file , specgen )
2016-11-23 09:41:48 +00:00
}
2016-11-30 08:19:36 +00:00
// getAppArmorProfileName gets the profile name for the given container.
2017-11-30 15:12:16 +00:00
func ( s * Server ) getAppArmorProfileName ( profile string ) string {
2016-11-30 08:19:36 +00:00
if profile == "" {
return ""
}
if profile == apparmor . ProfileRuntimeDefault {
// If the value is runtime/default, then return default profile.
return s . appArmorProfile
}
2016-12-02 07:13:41 +00:00
return strings . TrimPrefix ( profile , apparmor . ProfileNamePrefix )
2016-11-30 08:19:36 +00:00
}
2017-03-29 18:16:53 +00:00
// openContainerFile opens a file inside a container rootfs safely
func openContainerFile ( rootfs string , path string ) ( io . ReadCloser , error ) {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , path ) , rootfs )
if err != nil {
return nil , err
}
return os . Open ( fp )
}
2017-03-29 18:18:35 +00:00
// getUserInfo returns UID, GID and additional groups for specified user
// by looking them up in /etc/passwd and /etc/group
func getUserInfo ( rootfs string , userName string ) ( uint32 , uint32 , [ ] uint32 , error ) {
// We don't care if we can't open the file because
// not all images will have these files
passwdFile , err := openContainerFile ( rootfs , "/etc/passwd" )
2017-03-29 18:23:33 +00:00
if err != nil {
logrus . Warnf ( "Failed to open /etc/passwd: %v" , err )
} else {
2017-03-29 18:18:35 +00:00
defer passwdFile . Close ( )
}
2017-03-29 18:23:33 +00:00
2017-03-29 18:18:35 +00:00
groupFile , err := openContainerFile ( rootfs , "/etc/group" )
2017-03-29 18:23:33 +00:00
if err != nil {
logrus . Warnf ( "Failed to open /etc/group: %v" , err )
} else {
2017-03-29 18:18:35 +00:00
defer groupFile . Close ( )
}
execUser , err := user . GetExecUser ( userName , nil , passwdFile , groupFile )
if err != nil {
return 0 , 0 , nil , err
}
uid := uint32 ( execUser . Uid )
gid := uint32 ( execUser . Gid )
var additionalGids [ ] uint32
for _ , g := range execUser . Sgids {
additionalGids = append ( additionalGids , uint32 ( g ) )
}
return uid , gid , additionalGids , nil
}
2017-09-20 13:19:58 +00:00
func setOCIBindMountsPrivileged ( g * generate . Generator ) {
spec := g . Spec ( )
// clear readonly for /sys and cgroup
for i , m := range spec . Mounts {
if spec . Mounts [ i ] . Destination == "/sys" && ! spec . Root . Readonly {
clearReadOnly ( & spec . Mounts [ i ] )
}
if m . Type == "cgroup" {
clearReadOnly ( & spec . Mounts [ i ] )
}
}
spec . Linux . ReadonlyPaths = nil
spec . Linux . MaskedPaths = nil
}
func clearReadOnly ( m * rspec . Mount ) {
var opt [ ] string
for _ , o := range m . Options {
if o != "ro" {
opt = append ( opt , o )
}
}
m . Options = opt
}
2017-11-03 17:59:52 +00:00
func setupWorkingDirectory ( rootfs , mountLabel , containerCwd string ) error {
fp , err := symlink . FollowSymlinkInScope ( filepath . Join ( rootfs , containerCwd ) , rootfs )
if err != nil {
return err
}
if err := os . MkdirAll ( fp , 0755 ) ; err != nil {
return err
}
if mountLabel != "" {
if err1 := label . Relabel ( fp , mountLabel , true ) ; err1 != nil && err1 != unix . ENOTSUP {
return fmt . Errorf ( "relabel failed %s: %v" , fp , err1 )
}
}
return nil
}