Vendor in files for runc/libcontainer
vndr was previously removing .go files in runc/libcontainer since they weren't used anywhere in cri-o, but kpod stats will use them Signed-off-by: Ryan Cole <rcyoalne@gmail.com>
This commit is contained in:
parent
efc29e3dd2
commit
65eec38754
47 changed files with 7982 additions and 0 deletions
114
vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go
generated
vendored
Normal file
114
vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,114 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
|
||||
|
||||
var capabilityMap map[string]capability.Cap
|
||||
|
||||
func init() {
|
||||
capabilityMap = make(map[string]capability.Cap)
|
||||
last := capability.CAP_LAST_CAP
|
||||
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
|
||||
if last == capability.Cap(63) {
|
||||
last = capability.CAP_BLOCK_SUSPEND
|
||||
}
|
||||
for _, cap := range capability.List() {
|
||||
if cap > last {
|
||||
continue
|
||||
}
|
||||
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
|
||||
capabilityMap[capKey] = cap
|
||||
}
|
||||
}
|
||||
|
||||
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
|
||||
bounding := []capability.Cap{}
|
||||
for _, c := range capConfig.Bounding {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
bounding = append(bounding, v)
|
||||
}
|
||||
effective := []capability.Cap{}
|
||||
for _, c := range capConfig.Effective {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
effective = append(effective, v)
|
||||
}
|
||||
inheritable := []capability.Cap{}
|
||||
for _, c := range capConfig.Inheritable {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
inheritable = append(inheritable, v)
|
||||
}
|
||||
permitted := []capability.Cap{}
|
||||
for _, c := range capConfig.Permitted {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
permitted = append(permitted, v)
|
||||
}
|
||||
ambient := []capability.Cap{}
|
||||
for _, c := range capConfig.Ambient {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
ambient = append(ambient, v)
|
||||
}
|
||||
pid, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &containerCapabilities{
|
||||
bounding: bounding,
|
||||
effective: effective,
|
||||
inheritable: inheritable,
|
||||
permitted: permitted,
|
||||
ambient: ambient,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type containerCapabilities struct {
|
||||
pid capability.Capabilities
|
||||
bounding []capability.Cap
|
||||
effective []capability.Cap
|
||||
inheritable []capability.Cap
|
||||
permitted []capability.Cap
|
||||
ambient []capability.Cap
|
||||
}
|
||||
|
||||
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
||||
func (c *containerCapabilities) ApplyBoundingSet() error {
|
||||
c.pid.Clear(capability.BOUNDS)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
return c.pid.Apply(capability.BOUNDS)
|
||||
}
|
||||
|
||||
// Apply sets all the capabilities for the current process in the config.
|
||||
func (c *containerCapabilities) ApplyCaps() error {
|
||||
c.pid.Clear(allCapabilityTypes)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
c.pid.Set(capability.PERMITTED, c.permitted...)
|
||||
c.pid.Set(capability.INHERITABLE, c.inheritable...)
|
||||
c.pid.Set(capability.EFFECTIVE, c.effective...)
|
||||
c.pid.Set(capability.AMBIENT, c.ambient...)
|
||||
return c.pid.Apply(allCapabilityTypes)
|
||||
}
|
128
vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go
generated
vendored
Normal file
128
vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go
generated
vendored
Normal file
|
@ -0,0 +1,128 @@
|
|||
// +build linux
|
||||
|
||||
package rootless
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
)
|
||||
|
||||
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
|
||||
// needlessly. We should probably export this list.
|
||||
|
||||
var subsystems = []subsystem{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
}
|
||||
|
||||
// The noop cgroup manager is used for rootless containers, because we currently
|
||||
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
|
||||
// by factory if we are in rootless mode. We error out if any cgroup options are
|
||||
// set in the config -- this may change in the future with upcoming kernel features
|
||||
// like the cgroup namespace.
|
||||
|
||||
type Manager struct {
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
// If there are no cgroup settings, there's nothing to do.
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// We can't set paths.
|
||||
// TODO(cyphar): Implement the case where the runner of a rootless container
|
||||
// owns their own cgroup, which would allow us to set up a
|
||||
// cgroup for each path.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return fmt.Errorf("cannot change cgroup path in rootless container")
|
||||
}
|
||||
|
||||
// We load the paths into the manager.
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
|
||||
path, err := cgroups.GetOwnCgroupPath(name)
|
||||
if err != nil {
|
||||
// Ignore paths we couldn't resolve.
|
||||
continue
|
||||
}
|
||||
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return m.Paths
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
// We have to re-do the validation here, since someone might decide to
|
||||
// update a rootless container.
|
||||
return validate.New().Validate(container)
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
dir, err := cgroups.GetOwnCgroupPath("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
dir, err := cgroups.GetOwnCgroupPath("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
// TODO(cyphar): We can make this work if we figure out a way to allow usage
|
||||
// of cgroups with a rootless container. While this doesn't
|
||||
// actually require write access to a cgroup directory, the
|
||||
// statistics are not useful if they can be affected by
|
||||
// non-container processes.
|
||||
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
// TODO(cyphar): We can make this work if we figure out a way to allow usage
|
||||
// of cgroups with a rootless container.
|
||||
return fmt.Errorf("cannot use freezer cgroup in rootless container")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
// We don't have to do anything here because we didn't do any setup.
|
||||
return nil
|
||||
}
|
10
vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go
generated
vendored
Normal file
10
vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
// +build linux,!go1.5
|
||||
|
||||
package libcontainer
|
||||
|
||||
import "syscall"
|
||||
|
||||
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
|
||||
// with earlier versions
|
||||
func enableSetgroups(sys *syscall.SysProcAttr) {
|
||||
}
|
117
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
Normal file
117
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
Normal file
|
@ -0,0 +1,117 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
geteuid = os.Geteuid
|
||||
getegid = os.Getegid
|
||||
)
|
||||
|
||||
func (v *ConfigValidator) rootless(config *configs.Config) error {
|
||||
if err := rootlessMappings(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rootlessMount(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// Currently, cgroups cannot effectively be used in rootless containers.
|
||||
// The new cgroup namespace doesn't really help us either because it doesn't
|
||||
// have nice interactions with the user namespace (we're working with upstream
|
||||
// to fix this).
|
||||
if err := rootlessCgroup(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// XXX: We currently can't verify the user config at all, because
|
||||
// configs.Config doesn't store the user-related configs. So this
|
||||
// has to be verified by setupUser() in init_linux.go.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func rootlessMappings(config *configs.Config) error {
|
||||
rootuid, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
|
||||
}
|
||||
if euid := geteuid(); euid != 0 {
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return fmt.Errorf("rootless containers require user namespaces")
|
||||
}
|
||||
if rootuid != euid {
|
||||
return fmt.Errorf("rootless containers cannot map container root to a different host user")
|
||||
}
|
||||
}
|
||||
|
||||
rootgid, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
|
||||
}
|
||||
|
||||
// Similar to the above test, we need to make sure that we aren't trying to
|
||||
// map to a group ID that we don't have the right to be.
|
||||
if rootgid != getegid() {
|
||||
return fmt.Errorf("rootless containers cannot map container root to a different host group")
|
||||
}
|
||||
|
||||
// We can only map one user and group inside a container (our own).
|
||||
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
|
||||
return fmt.Errorf("rootless containers cannot map more than one user")
|
||||
}
|
||||
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
|
||||
return fmt.Errorf("rootless containers cannot map more than one group")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
|
||||
func rootlessCgroup(config *configs.Config) error {
|
||||
// Nothing set at all.
|
||||
if config.Cgroups == nil || config.Cgroups.Resources == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for comparing to the zero value.
|
||||
left := reflect.ValueOf(*config.Cgroups.Resources)
|
||||
right := reflect.Zero(left.Type())
|
||||
|
||||
// This is all we need to do, since specconv won't add cgroup options in
|
||||
// rootless mode.
|
||||
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
|
||||
return fmt.Errorf("cannot specify resource limits in rootless container")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount verifies that the user isn't trying to set up any mounts they don't have
|
||||
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
|
||||
// `gid=` option that doesn't resolve to root.
|
||||
func rootlessMount(config *configs.Config) error {
|
||||
// XXX: We could whitelist allowed devices at this point, but I'm not
|
||||
// convinced that's a good idea. The kernel is the best arbiter of
|
||||
// access control.
|
||||
|
||||
for _, mount := range config.Mounts {
|
||||
// Check that the options list doesn't contain any uid= or gid= entries
|
||||
// that don't resolve to root.
|
||||
for _, opt := range strings.Split(mount.Data, ",") {
|
||||
if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
|
||||
return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
|
||||
}
|
||||
if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
|
||||
return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
195
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
Normal file
195
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
Normal file
|
@ -0,0 +1,195 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
selinux "github.com/opencontainers/selinux/go-selinux"
|
||||
)
|
||||
|
||||
type Validator interface {
|
||||
Validate(*configs.Config) error
|
||||
}
|
||||
|
||||
func New() Validator {
|
||||
return &ConfigValidator{}
|
||||
}
|
||||
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.usernamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.sysctl(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootless {
|
||||
if err := v.rootless(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rootfs validates if the rootfs is an absolute path and is not a symlink
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
if _, err := os.Stat(config.Rootfs); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
|
||||
}
|
||||
return err
|
||||
}
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
}
|
||||
if filepath.Clean(config.Rootfs) != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
|
||||
!config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
if config.ProcessLabel != "" && !selinux.GetEnabled() {
|
||||
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||
if config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
|
||||
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
|
||||
}
|
||||
} else {
|
||||
if config.UidMappings != nil || config.GidMappings != nil {
|
||||
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sysctl validates that the specified sysctl keys are valid or not.
|
||||
// /proc/sys isn't completely namespaced and depending on which namespaces
|
||||
// are specified, a subset of sysctls are permitted.
|
||||
func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||
validSysctlMap := map[string]bool{
|
||||
"kernel.msgmax": true,
|
||||
"kernel.msgmnb": true,
|
||||
"kernel.msgmni": true,
|
||||
"kernel.sem": true,
|
||||
"kernel.shmall": true,
|
||||
"kernel.shmmax": true,
|
||||
"kernel.shmmni": true,
|
||||
"kernel.shm_rmid_forced": true,
|
||||
}
|
||||
|
||||
for s := range config.Sysctl {
|
||||
if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
|
||||
if config.Namespaces.Contains(configs.NEWIPC) {
|
||||
continue
|
||||
} else {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(s, "net.") {
|
||||
if config.Namespaces.Contains(configs.NEWNET) {
|
||||
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
|
||||
if err := checkHostNs(s, path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isSymbolicLink(path string) (bool, error) {
|
||||
fi, err := os.Lstat(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
|
||||
}
|
||||
|
||||
// checkHostNs checks whether network sysctl is used in host namespace.
|
||||
func checkHostNs(sysctlConfig string, path string) error {
|
||||
var currentProcessNetns = "/proc/self/ns/net"
|
||||
// readlink on the current processes network namespace
|
||||
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read soft link %q error", currentProcessNetns)
|
||||
}
|
||||
|
||||
// First check if the provided path is a symbolic link
|
||||
symLink, err := isSymbolicLink(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
|
||||
}
|
||||
|
||||
if symLink == false {
|
||||
// The provided namespace is not a symbolic link,
|
||||
// it is not the host namespace.
|
||||
return nil
|
||||
}
|
||||
|
||||
// readlink on the path provided in the struct
|
||||
destOfContainer, err := os.Readlink(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read soft link %q error", path)
|
||||
}
|
||||
if destOfContainer == destOfCurrentProcess {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
|
||||
}
|
||||
return nil
|
||||
}
|
17
vendor/github.com/opencontainers/runc/libcontainer/console.go
generated
vendored
Normal file
17
vendor/github.com/opencontainers/runc/libcontainer/console.go
generated
vendored
Normal file
|
@ -0,0 +1,17 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Console represents a pseudo TTY.
|
||||
type Console interface {
|
||||
io.ReadWriteCloser
|
||||
|
||||
// Path returns the filesystem path to the slave side of the pty.
|
||||
Path() string
|
||||
|
||||
// Fd returns the fd for the master of the pty.
|
||||
File() *os.File
|
||||
}
|
13
vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go
generated
vendored
Normal file
13
vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go
generated
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
// +build freebsd
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
// newConsole returns an initialized console that can be used within a container by copying bytes
|
||||
// from the master side to the slave that is attached as the tty for the container's init process.
|
||||
func newConsole() (Console, error) {
|
||||
return nil, errors.New("libcontainer console is not supported on FreeBSD")
|
||||
}
|
152
vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
generated
vendored
Normal file
152
vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,152 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func ConsoleFromFile(f *os.File) Console {
|
||||
return &linuxConsole{
|
||||
master: f,
|
||||
}
|
||||
}
|
||||
|
||||
// newConsole returns an initialized console that can be used within a container by copying bytes
|
||||
// from the master side to the slave that is attached as the tty for the container's init process.
|
||||
func newConsole() (Console, error) {
|
||||
master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
console, err := ptsname(master)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := unlockpt(master); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &linuxConsole{
|
||||
slavePath: console,
|
||||
master: master,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// linuxConsole is a linux pseudo TTY for use within a container.
|
||||
type linuxConsole struct {
|
||||
master *os.File
|
||||
slavePath string
|
||||
}
|
||||
|
||||
func (c *linuxConsole) File() *os.File {
|
||||
return c.master
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Path() string {
|
||||
return c.slavePath
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Read(b []byte) (int, error) {
|
||||
return c.master.Read(b)
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Write(b []byte) (int, error) {
|
||||
return c.master.Write(b)
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Close() error {
|
||||
if m := c.master; m != nil {
|
||||
return m.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount initializes the console inside the rootfs mounting with the specified mount label
|
||||
// and applying the correct ownership of the console.
|
||||
func (c *linuxConsole) mount() error {
|
||||
oldMask := unix.Umask(0000)
|
||||
defer unix.Umask(oldMask)
|
||||
f, err := os.Create("/dev/console")
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "")
|
||||
}
|
||||
|
||||
// dupStdio opens the slavePath for the console and dups the fds to the current
|
||||
// processes stdio, fd 0,1,2.
|
||||
func (c *linuxConsole) dupStdio() error {
|
||||
slave, err := c.open(unix.O_RDWR)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd := int(slave.Fd())
|
||||
for _, i := range []int{0, 1, 2} {
|
||||
if err := unix.Dup3(fd, i, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
|
||||
func (c *linuxConsole) open(flag int) (*os.File, error) {
|
||||
r, e := unix.Open(c.slavePath, flag, 0)
|
||||
if e != nil {
|
||||
return nil, &os.PathError{
|
||||
Op: "open",
|
||||
Path: c.slavePath,
|
||||
Err: e,
|
||||
}
|
||||
}
|
||||
return os.NewFile(uintptr(r), c.slavePath), nil
|
||||
}
|
||||
|
||||
func ioctl(fd uintptr, flag, data uintptr) error {
|
||||
if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
|
||||
// unlockpt should be called before opening the slave side of a pty.
|
||||
func unlockpt(f *os.File) error {
|
||||
var u int32
|
||||
return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
|
||||
}
|
||||
|
||||
// ptsname retrieves the name of the first available pts for the given master.
|
||||
func ptsname(f *os.File) (string, error) {
|
||||
n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("/dev/pts/%d", n), nil
|
||||
}
|
||||
|
||||
// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
|
||||
// created by us acts normally. In particular, a not-very-well-known default of
|
||||
// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
|
||||
// problem for terminal emulators, because we relay data from the terminal we
|
||||
// also relay that funky line discipline.
|
||||
func SaneTerminal(terminal *os.File) error {
|
||||
termios, err := unix.IoctlGetTermios(int(terminal.Fd()), unix.TCGETS)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
|
||||
}
|
||||
|
||||
// Set -onlcr so we don't have to deal with \r.
|
||||
termios.Oflag &^= unix.ONLCR
|
||||
|
||||
if err := unix.IoctlSetTermios(int(terminal.Fd()), unix.TCSETS, termios); err != nil {
|
||||
return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
11
vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go
generated
vendored
Normal file
11
vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go
generated
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
// newConsole returns an initialized console that can be used within a container by copying bytes
|
||||
// from the master side to the slave that is attached as the tty for the container's init process.
|
||||
func newConsole() (Console, error) {
|
||||
return nil, errors.New("libcontainer console is not supported on Solaris")
|
||||
}
|
30
vendor/github.com/opencontainers/runc/libcontainer/console_windows.go
generated
vendored
Normal file
30
vendor/github.com/opencontainers/runc/libcontainer/console_windows.go
generated
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
package libcontainer
|
||||
|
||||
// newConsole returns an initialized console that can be used within a container
|
||||
func newConsole() (Console, error) {
|
||||
return &windowsConsole{}, nil
|
||||
}
|
||||
|
||||
// windowsConsole is a Windows pseudo TTY for use within a container.
|
||||
type windowsConsole struct {
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Fd() uintptr {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Path() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Read(b []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Write(b []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Close() error {
|
||||
return nil
|
||||
}
|
166
vendor/github.com/opencontainers/runc/libcontainer/container.go
generated
vendored
Normal file
166
vendor/github.com/opencontainers/runc/libcontainer/container.go
generated
vendored
Normal file
|
@ -0,0 +1,166 @@
|
|||
// Package libcontainer provides a native Go implementation for creating containers
|
||||
// with namespaces, cgroups, capabilities, and filesystem access controls.
|
||||
// It allows you to manage the lifecycle of the container performing additional operations
|
||||
// after the container is created.
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// Status is the status of a container.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
// Created is the status that denotes the container exists but has not been run yet.
|
||||
Created Status = iota
|
||||
// Running is the status that denotes the container exists and is running.
|
||||
Running
|
||||
// Pausing is the status that denotes the container exists, it is in the process of being paused.
|
||||
Pausing
|
||||
// Paused is the status that denotes the container exists, but all its processes are paused.
|
||||
Paused
|
||||
// Stopped is the status that denotes the container does not have a created or running process.
|
||||
Stopped
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
switch s {
|
||||
case Created:
|
||||
return "created"
|
||||
case Running:
|
||||
return "running"
|
||||
case Pausing:
|
||||
return "pausing"
|
||||
case Paused:
|
||||
return "paused"
|
||||
case Stopped:
|
||||
return "stopped"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// BaseState represents the platform agnostic pieces relating to a
|
||||
// running container's state
|
||||
type BaseState struct {
|
||||
// ID is the container ID.
|
||||
ID string `json:"id"`
|
||||
|
||||
// InitProcessPid is the init process id in the parent namespace.
|
||||
InitProcessPid int `json:"init_process_pid"`
|
||||
|
||||
// InitProcessStartTime is the init process start time in clock cycles since boot time.
|
||||
InitProcessStartTime uint64 `json:"init_process_start"`
|
||||
|
||||
// Created is the unix timestamp for the creation time of the container in UTC
|
||||
Created time.Time `json:"created"`
|
||||
|
||||
// Config is the container's configuration.
|
||||
Config configs.Config `json:"config"`
|
||||
}
|
||||
|
||||
// BaseContainer is a libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
// was not found. BaseContainer includes methods that are platform agnostic.
|
||||
type BaseContainer interface {
|
||||
// Returns the ID of the container
|
||||
ID() string
|
||||
|
||||
// Returns the current status of the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Status() (Status, error)
|
||||
|
||||
// State returns the current container's state information.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
State() (*State, error)
|
||||
|
||||
// Returns the current config of the container.
|
||||
Config() configs.Config
|
||||
|
||||
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
//
|
||||
// Some of the returned PIDs may no longer refer to processes in the Container, unless
|
||||
// the Container state is PAUSED in which case every PID in the slice is valid.
|
||||
Processes() ([]int, error)
|
||||
|
||||
// Returns statistics for the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Stats() (*Stats, error)
|
||||
|
||||
// Set resources of container as configured
|
||||
//
|
||||
// We can use this to change resources when containers are running.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Set(config configs.Config) error
|
||||
|
||||
// Start a process inside the container. Returns error if process fails to
|
||||
// start. You can track process lifecycle with passed Process structure.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ConfigInvalid - config is invalid,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Start(process *Process) (err error)
|
||||
|
||||
// Run immediately starts the process inside the container. Returns error if process
|
||||
// fails to start. It does not block waiting for the exec fifo after start returns but
|
||||
// opens the fifo after start returns.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ConfigInvalid - config is invalid,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Run(process *Process) (err error)
|
||||
|
||||
// Destroys the container, if its in a valid state, after killing any
|
||||
// remaining running processes.
|
||||
//
|
||||
// Any event registrations are removed before the container is destroyed.
|
||||
// No error is returned if the container is already destroyed.
|
||||
//
|
||||
// Running containers must first be stopped using Signal(..).
|
||||
// Paused containers must first be resumed using Resume(..).
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotStopped - Container is still running,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Destroy() error
|
||||
|
||||
// Signal sends the provided signal code to the container's initial process.
|
||||
//
|
||||
// If all is specified the signal is sent to all processes in the container
|
||||
// including the initial process.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Signal(s os.Signal, all bool) error
|
||||
|
||||
// Exec signals the container to exec the users process at the end of the init.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Exec() error
|
||||
}
|
1574
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
Normal file
1574
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
20
vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go
generated
vendored
Normal file
20
vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go
generated
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
package libcontainer
|
||||
|
||||
// State represents a running container's state
|
||||
type State struct {
|
||||
BaseState
|
||||
|
||||
// Platform specific fields below here
|
||||
}
|
||||
|
||||
// A libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
// was not found.
|
||||
type Container interface {
|
||||
BaseContainer
|
||||
|
||||
// Methods below here are platform specific
|
||||
|
||||
}
|
20
vendor/github.com/opencontainers/runc/libcontainer/container_windows.go
generated
vendored
Normal file
20
vendor/github.com/opencontainers/runc/libcontainer/container_windows.go
generated
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
package libcontainer
|
||||
|
||||
// State represents a running container's state
|
||||
type State struct {
|
||||
BaseState
|
||||
|
||||
// Platform specific fields below here
|
||||
}
|
||||
|
||||
// A libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
// was not found.
|
||||
type Container interface {
|
||||
BaseContainer
|
||||
|
||||
// Methods below here are platform specific
|
||||
|
||||
}
|
37
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
Normal file
37
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
package libcontainer
|
||||
|
||||
// cgroup restoring strategy provided by criu
|
||||
type cgMode uint32
|
||||
|
||||
const (
|
||||
CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu
|
||||
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
|
||||
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
|
||||
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
|
||||
)
|
||||
|
||||
type CriuPageServerInfo struct {
|
||||
Address string // IP address of CRIU page server
|
||||
Port int32 // port number of CRIU page server
|
||||
}
|
||||
|
||||
type VethPairName struct {
|
||||
ContainerInterfaceName string
|
||||
HostInterfaceName string
|
||||
}
|
||||
|
||||
type CriuOpts struct {
|
||||
ImagesDirectory string // directory for storing image files
|
||||
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
||||
ParentImage string // direcotry for storing parent image files in pre-dump and dump
|
||||
LeaveRunning bool // leave container in running state after checkpoint
|
||||
TcpEstablished bool // checkpoint/restore established TCP connections
|
||||
ExternalUnixConnections bool // allow external unix connections
|
||||
ShellJob bool // allow to dump and restore shell jobs
|
||||
FileLocks bool // handle file locks, for safety
|
||||
PreDump bool // call criu predump to perform iterative checkpoint
|
||||
PageServer CriuPageServerInfo // allow to dump to criu page server
|
||||
VethPairs []VethPairName // pass the veth to criu when restore
|
||||
ManageCgroupsMode cgMode // dump or restore cgroup mode
|
||||
EmptyNs uint32 // don't c/r properties for namespace from this mask
|
||||
}
|
6
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go
generated
vendored
Normal file
6
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go
generated
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
package libcontainer
|
||||
|
||||
// TODO Windows: This can ultimately be entirely factored out as criu is
|
||||
// a Unix concept not relevant on Windows.
|
||||
type CriuOpts struct {
|
||||
}
|
1069
vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go
generated
vendored
Normal file
1069
vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
195
vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.proto
generated
vendored
Normal file
195
vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.proto
generated
vendored
Normal file
|
@ -0,0 +1,195 @@
|
|||
syntax = "proto2";
|
||||
|
||||
message criu_page_server_info {
|
||||
optional string address = 1;
|
||||
optional int32 port = 2;
|
||||
optional int32 pid = 3;
|
||||
optional int32 fd = 4;
|
||||
}
|
||||
|
||||
message criu_veth_pair {
|
||||
required string if_in = 1;
|
||||
required string if_out = 2;
|
||||
};
|
||||
|
||||
message ext_mount_map {
|
||||
required string key = 1;
|
||||
required string val = 2;
|
||||
};
|
||||
|
||||
message join_namespace {
|
||||
required string ns = 1;
|
||||
required string ns_file = 2;
|
||||
optional string extra_opt = 3;
|
||||
}
|
||||
|
||||
message inherit_fd {
|
||||
required string key = 1;
|
||||
required int32 fd = 2;
|
||||
};
|
||||
|
||||
message cgroup_root {
|
||||
optional string ctrl = 1;
|
||||
required string path = 2;
|
||||
};
|
||||
|
||||
message unix_sk {
|
||||
required uint32 inode = 1;
|
||||
};
|
||||
|
||||
enum criu_cg_mode {
|
||||
IGNORE = 0;
|
||||
CG_NONE = 1;
|
||||
PROPS = 2;
|
||||
SOFT = 3;
|
||||
FULL = 4;
|
||||
STRICT = 5;
|
||||
DEFAULT = 6;
|
||||
};
|
||||
|
||||
message criu_opts {
|
||||
required int32 images_dir_fd = 1;
|
||||
optional int32 pid = 2; /* if not set on dump, will dump requesting process */
|
||||
|
||||
optional bool leave_running = 3;
|
||||
optional bool ext_unix_sk = 4;
|
||||
optional bool tcp_established = 5;
|
||||
optional bool evasive_devices = 6;
|
||||
optional bool shell_job = 7;
|
||||
optional bool file_locks = 8;
|
||||
optional int32 log_level = 9 [default = 2];
|
||||
optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */
|
||||
|
||||
optional criu_page_server_info ps = 11;
|
||||
|
||||
optional bool notify_scripts = 12;
|
||||
|
||||
optional string root = 13;
|
||||
optional string parent_img = 14;
|
||||
optional bool track_mem = 15;
|
||||
optional bool auto_dedup = 16;
|
||||
|
||||
optional int32 work_dir_fd = 17;
|
||||
optional bool link_remap = 18;
|
||||
repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */
|
||||
|
||||
optional uint32 cpu_cap = 20 [default = 0xffffffff];
|
||||
optional bool force_irmap = 21;
|
||||
repeated string exec_cmd = 22;
|
||||
|
||||
repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */
|
||||
optional bool manage_cgroups = 24; /* backward compatibility */
|
||||
repeated cgroup_root cg_root = 25;
|
||||
|
||||
optional bool rst_sibling = 26; /* swrk only */
|
||||
repeated inherit_fd inherit_fd = 27; /* swrk only */
|
||||
|
||||
optional bool auto_ext_mnt = 28;
|
||||
optional bool ext_sharing = 29;
|
||||
optional bool ext_masters = 30;
|
||||
|
||||
repeated string skip_mnt = 31;
|
||||
repeated string enable_fs = 32;
|
||||
|
||||
repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */
|
||||
|
||||
optional criu_cg_mode manage_cgroups_mode = 34;
|
||||
optional uint32 ghost_limit = 35 [default = 0x100000];
|
||||
repeated string irmap_scan_paths = 36;
|
||||
repeated string external = 37;
|
||||
optional uint32 empty_ns = 38;
|
||||
repeated join_namespace join_ns = 39;
|
||||
|
||||
optional string cgroup_props = 41;
|
||||
optional string cgroup_props_file = 42;
|
||||
repeated string cgroup_dump_controller = 43;
|
||||
|
||||
optional string freeze_cgroup = 44;
|
||||
optional uint32 timeout = 45;
|
||||
optional bool tcp_skip_in_flight = 46;
|
||||
optional bool weak_sysctls = 47;
|
||||
optional bool lazy_pages = 48;
|
||||
optional int32 status_fd = 49;
|
||||
optional bool orphan_pts_master = 50;
|
||||
}
|
||||
|
||||
message criu_dump_resp {
|
||||
optional bool restored = 1;
|
||||
}
|
||||
|
||||
message criu_restore_resp {
|
||||
required int32 pid = 1;
|
||||
}
|
||||
|
||||
message criu_notify {
|
||||
optional string script = 1;
|
||||
optional int32 pid = 2;
|
||||
}
|
||||
|
||||
enum criu_req_type {
|
||||
EMPTY = 0;
|
||||
DUMP = 1;
|
||||
RESTORE = 2;
|
||||
CHECK = 3;
|
||||
PRE_DUMP = 4;
|
||||
PAGE_SERVER = 5;
|
||||
|
||||
NOTIFY = 6;
|
||||
|
||||
CPUINFO_DUMP = 7;
|
||||
CPUINFO_CHECK = 8;
|
||||
|
||||
FEATURE_CHECK = 9;
|
||||
}
|
||||
|
||||
/*
|
||||
* List of features which can queried via
|
||||
* CRIU_REQ_TYPE__FEATURE_CHECK
|
||||
*/
|
||||
message criu_features {
|
||||
optional bool mem_track = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Request -- each type corresponds to must-be-there
|
||||
* request arguments of respective type
|
||||
*/
|
||||
|
||||
message criu_req {
|
||||
required criu_req_type type = 1;
|
||||
|
||||
optional criu_opts opts = 2;
|
||||
optional bool notify_success = 3;
|
||||
|
||||
/*
|
||||
* When set service won't close the connection but
|
||||
* will wait for more req-s to appear. Works not
|
||||
* for all request types.
|
||||
*/
|
||||
optional bool keep_open = 4;
|
||||
/*
|
||||
* 'features' can be used to query which features
|
||||
* are supported by the installed criu/kernel
|
||||
* via RPC.
|
||||
*/
|
||||
optional criu_features features = 5;
|
||||
}
|
||||
|
||||
/*
|
||||
* Response -- it states whether the request was served
|
||||
* and additional request-specific information
|
||||
*/
|
||||
|
||||
message criu_resp {
|
||||
required criu_req_type type = 1;
|
||||
required bool success = 2;
|
||||
|
||||
optional criu_dump_resp dump = 3;
|
||||
optional criu_restore_resp restore = 4;
|
||||
optional criu_notify notify = 5;
|
||||
optional criu_page_server_info ps = 6;
|
||||
|
||||
optional int32 cr_errno = 7;
|
||||
optional criu_features features = 8;
|
||||
optional string cr_errmsg = 9;
|
||||
}
|
70
vendor/github.com/opencontainers/runc/libcontainer/error.go
generated
vendored
Normal file
70
vendor/github.com/opencontainers/runc/libcontainer/error.go
generated
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
package libcontainer
|
||||
|
||||
import "io"
|
||||
|
||||
// ErrorCode is the API error code type.
|
||||
type ErrorCode int
|
||||
|
||||
// API error codes.
|
||||
const (
|
||||
// Factory errors
|
||||
IdInUse ErrorCode = iota
|
||||
InvalidIdFormat
|
||||
|
||||
// Container errors
|
||||
ContainerNotExists
|
||||
ContainerPaused
|
||||
ContainerNotStopped
|
||||
ContainerNotRunning
|
||||
ContainerNotPaused
|
||||
|
||||
// Process errors
|
||||
NoProcessOps
|
||||
|
||||
// Common errors
|
||||
ConfigInvalid
|
||||
ConsoleExists
|
||||
SystemError
|
||||
)
|
||||
|
||||
func (c ErrorCode) String() string {
|
||||
switch c {
|
||||
case IdInUse:
|
||||
return "Id already in use"
|
||||
case InvalidIdFormat:
|
||||
return "Invalid format"
|
||||
case ContainerPaused:
|
||||
return "Container paused"
|
||||
case ConfigInvalid:
|
||||
return "Invalid configuration"
|
||||
case SystemError:
|
||||
return "System error"
|
||||
case ContainerNotExists:
|
||||
return "Container does not exist"
|
||||
case ContainerNotStopped:
|
||||
return "Container is not stopped"
|
||||
case ContainerNotRunning:
|
||||
return "Container is not running"
|
||||
case ConsoleExists:
|
||||
return "Console exists for process"
|
||||
case ContainerNotPaused:
|
||||
return "Container is not paused"
|
||||
case NoProcessOps:
|
||||
return "No process operations"
|
||||
default:
|
||||
return "Unknown error"
|
||||
}
|
||||
}
|
||||
|
||||
// Error is the API error type.
|
||||
type Error interface {
|
||||
error
|
||||
|
||||
// Returns an error if it failed to write the detail of the Error to w.
|
||||
// The detail of the Error may include the error message and a
|
||||
// representation of the stack trace.
|
||||
Detail(w io.Writer) error
|
||||
|
||||
// Returns the error code for this error.
|
||||
Code() ErrorCode
|
||||
}
|
44
vendor/github.com/opencontainers/runc/libcontainer/factory.go
generated
vendored
Normal file
44
vendor/github.com/opencontainers/runc/libcontainer/factory.go
generated
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Factory interface {
|
||||
// Creates a new container with the given id and starts the initial process inside it.
|
||||
// id must be a string containing only letters, digits and underscores and must contain
|
||||
// between 1 and 1024 characters, inclusive.
|
||||
//
|
||||
// The id must not already be in use by an existing container. Containers created using
|
||||
// a factory with the same path (and filesystem) must have distinct ids.
|
||||
//
|
||||
// Returns the new container with a running process.
|
||||
//
|
||||
// errors:
|
||||
// IdInUse - id is already in use by a container
|
||||
// InvalidIdFormat - id has incorrect format
|
||||
// ConfigInvalid - config is invalid
|
||||
// Systemerror - System error
|
||||
//
|
||||
// On error, any partially created container parts are cleaned up (the operation is atomic).
|
||||
Create(id string, config *configs.Config) (Container, error)
|
||||
|
||||
// Load takes an ID for an existing container and returns the container information
|
||||
// from the state. This presents a read only view of the container.
|
||||
//
|
||||
// errors:
|
||||
// Path does not exist
|
||||
// System error
|
||||
Load(id string) (Container, error)
|
||||
|
||||
// StartInitialization is an internal API to libcontainer used during the reexec of the
|
||||
// container.
|
||||
//
|
||||
// Errors:
|
||||
// Pipe connection error
|
||||
// System error
|
||||
StartInitialization() error
|
||||
|
||||
// Type returns info string about factory type (e.g. lxc, libcontainer...)
|
||||
Type() string
|
||||
}
|
325
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
Normal file
325
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,325 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
stateFilename = "state.json"
|
||||
execFifoFilename = "exec.fifo"
|
||||
)
|
||||
|
||||
var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
|
||||
|
||||
// InitArgs returns an options func to configure a LinuxFactory with the
|
||||
// provided init binary path and arguments.
|
||||
func InitArgs(args ...string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) (err error) {
|
||||
if len(args) > 0 {
|
||||
// Resolve relative paths to ensure that its available
|
||||
// after directory changes.
|
||||
if args[0], err = filepath.Abs(args[0]); err != nil {
|
||||
return newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
}
|
||||
|
||||
l.InitArgs = args
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SystemdCgroups is an options func to configure a LinuxFactory to return
|
||||
// containers that use systemd to create and manage cgroups.
|
||||
func SystemdCgroups(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &systemd.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to
|
||||
// create and manage cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RootlessCgroups is an options func to configure a LinuxFactory to
|
||||
// return containers that use the "rootless" cgroup manager, which will
|
||||
// fail to do any operations not possible to do with an unprivileged user.
|
||||
// It should only be used in conjunction with rootless containers.
|
||||
func RootlessCgroups(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &rootless.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
|
||||
func TmpfsRoot(l *LinuxFactory) error {
|
||||
mounted, err := mount.Mounted(l.Root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !mounted {
|
||||
if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CriuPath returns an option func to configure a LinuxFactory with the
|
||||
// provided criupath
|
||||
func CriuPath(criupath string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) error {
|
||||
l.CriuPath = criupath
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// New returns a linux based container factory based in the root directory and
|
||||
// configures the factory with the provided option funcs.
|
||||
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
|
||||
if root != "" {
|
||||
if err := os.MkdirAll(root, 0700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
}
|
||||
l := &LinuxFactory{
|
||||
Root: root,
|
||||
InitArgs: []string{"/proc/self/exe", "init"},
|
||||
Validator: validate.New(),
|
||||
CriuPath: "criu",
|
||||
}
|
||||
Cgroupfs(l)
|
||||
for _, opt := range options {
|
||||
if err := opt(l); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return l, nil
|
||||
}
|
||||
|
||||
// LinuxFactory implements the default factory interface for linux based systems.
|
||||
type LinuxFactory struct {
|
||||
// Root directory for the factory to store state.
|
||||
Root string
|
||||
|
||||
// InitArgs are arguments for calling the init responsibilities for spawning
|
||||
// a container.
|
||||
InitArgs []string
|
||||
|
||||
// CriuPath is the path to the criu binary used for checkpoint and restore of
|
||||
// containers.
|
||||
CriuPath string
|
||||
|
||||
// Validator provides validation to container configurations.
|
||||
Validator validate.Validator
|
||||
|
||||
// NewCgroupsManager returns an initialized cgroups manager for a single container.
|
||||
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
}
|
||||
if err := l.validateID(id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := l.Validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
if _, err := os.Stat(containerRoot); err == nil {
|
||||
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
|
||||
} else if !os.IsNotExist(err) {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := os.MkdirAll(containerRoot, 0711); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if config.Rootless {
|
||||
RootlessCgroups(l)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
config: config,
|
||||
initArgs: l.InitArgs,
|
||||
criuPath: l.CriuPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
state, err := l.loadState(containerRoot, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r := &nonChildProcess{
|
||||
processPid: state.InitProcessPid,
|
||||
processStartTime: state.InitProcessStartTime,
|
||||
fds: state.ExternalDescriptors,
|
||||
}
|
||||
// We have to use the RootlessManager.
|
||||
if state.Rootless {
|
||||
RootlessCgroups(l)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
initProcess: r,
|
||||
initProcessStartTime: state.InitProcessStartTime,
|
||||
id: id,
|
||||
config: &state.Config,
|
||||
initArgs: l.InitArgs,
|
||||
criuPath: l.CriuPath,
|
||||
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
|
||||
root: containerRoot,
|
||||
created: state.Created,
|
||||
}
|
||||
c.state = &loadedState{c: c}
|
||||
if err := c.refreshState(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Type() string {
|
||||
return "libcontainer"
|
||||
}
|
||||
|
||||
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||||
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||||
func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
var (
|
||||
pipefd, rootfd int
|
||||
consoleSocket *os.File
|
||||
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
|
||||
envStateDir = os.Getenv("_LIBCONTAINER_STATEDIR")
|
||||
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
|
||||
)
|
||||
|
||||
// Get the INITPIPE.
|
||||
pipefd, err = strconv.Atoi(envInitPipe)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
|
||||
}
|
||||
|
||||
var (
|
||||
pipe = os.NewFile(uintptr(pipefd), "pipe")
|
||||
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
|
||||
)
|
||||
defer pipe.Close()
|
||||
|
||||
// Only init processes have STATEDIR.
|
||||
rootfd = -1
|
||||
if it == initStandard {
|
||||
if rootfd, err = strconv.Atoi(envStateDir); err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_STATEDIR=%s to int: %s", envStateDir, err)
|
||||
}
|
||||
}
|
||||
|
||||
if envConsole != "" {
|
||||
console, err := strconv.Atoi(envConsole)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
|
||||
}
|
||||
consoleSocket = os.NewFile(uintptr(console), "console-socket")
|
||||
defer consoleSocket.Close()
|
||||
}
|
||||
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
|
||||
defer func() {
|
||||
// We have an error during the initialization of the container's init,
|
||||
// send it back to the parent process in the form of an initError.
|
||||
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
|
||||
}
|
||||
}()
|
||||
|
||||
i, err := newContainerInit(it, pipe, consoleSocket, rootfd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
|
||||
return i.Init()
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
|
||||
f, err := os.Open(filepath.Join(root, stateFilename))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
|
||||
}
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
defer f.Close()
|
||||
var state *State
|
||||
if err := json.NewDecoder(f).Decode(&state); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) validateID(id string) error {
|
||||
if !idRegex.MatchString(id) {
|
||||
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
92
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
Normal file
92
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
Normal file
|
@ -0,0 +1,92 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/stacktrace"
|
||||
)
|
||||
|
||||
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
|
||||
Code: {{.ECode}}
|
||||
{{if .Message }}
|
||||
Message: {{.Message}}
|
||||
{{end}}
|
||||
Frames:{{range $i, $frame := .Stack.Frames}}
|
||||
---
|
||||
{{$i}}: {{$frame.Function}}
|
||||
Package: {{$frame.Package}}
|
||||
File: {{$frame.File}}@{{$frame.Line}}{{end}}
|
||||
`))
|
||||
|
||||
func newGenericError(err error, c ErrorCode) Error {
|
||||
if le, ok := err.(Error); ok {
|
||||
return le
|
||||
}
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: c,
|
||||
Stack: stacktrace.Capture(1),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
func newSystemError(err error) Error {
|
||||
return createSystemError(err, "")
|
||||
}
|
||||
|
||||
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
|
||||
return createSystemError(err, fmt.Sprintf(cause, v...))
|
||||
}
|
||||
|
||||
func newSystemErrorWithCause(err error, cause string) Error {
|
||||
return createSystemError(err, cause)
|
||||
}
|
||||
|
||||
// createSystemError creates the specified error with the correct number of
|
||||
// stack frames skipped. This is only to be called by the other functions for
|
||||
// formatting the error.
|
||||
func createSystemError(err error, cause string) Error {
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: SystemError,
|
||||
Cause: cause,
|
||||
Stack: stacktrace.Capture(2),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
type genericError struct {
|
||||
Timestamp time.Time
|
||||
ECode ErrorCode
|
||||
Err error `json:"-"`
|
||||
Cause string
|
||||
Message string
|
||||
Stack stacktrace.Stacktrace
|
||||
}
|
||||
|
||||
func (e *genericError) Error() string {
|
||||
if e.Cause == "" {
|
||||
return e.Message
|
||||
}
|
||||
frame := e.Stack.Frames[0]
|
||||
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
|
||||
}
|
||||
|
||||
func (e *genericError) Code() ErrorCode {
|
||||
return e.ECode
|
||||
}
|
||||
|
||||
func (e *genericError) Detail(w io.Writer) error {
|
||||
return errorTemplate.Execute(w, e)
|
||||
}
|
502
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
Normal file
502
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,502 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall" // only for Errno
|
||||
"unsafe"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/vishvananda/netlink"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type initType string
|
||||
|
||||
const (
|
||||
initSetns initType = "setns"
|
||||
initStandard initType = "standard"
|
||||
)
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"pid"`
|
||||
}
|
||||
|
||||
// network is an internal struct used to setup container networks.
|
||||
type network struct {
|
||||
configs.Network
|
||||
|
||||
// TempVethPeerName is a unique temporary veth peer name that was placed into
|
||||
// the container's namespace.
|
||||
TempVethPeerName string `json:"temp_veth_peer_name"`
|
||||
}
|
||||
|
||||
// initConfig is used for transferring parameters from Exec() to Init()
|
||||
type initConfig struct {
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Cwd string `json:"cwd"`
|
||||
Capabilities *configs.Capabilities `json:"capabilities"`
|
||||
ProcessLabel string `json:"process_label"`
|
||||
AppArmorProfile string `json:"apparmor_profile"`
|
||||
NoNewPrivileges bool `json:"no_new_privileges"`
|
||||
User string `json:"user"`
|
||||
AdditionalGroups []string `json:"additional_groups"`
|
||||
Config *configs.Config `json:"config"`
|
||||
Networks []*network `json:"network"`
|
||||
PassedFilesCount int `json:"passed_files_count"`
|
||||
ContainerId string `json:"containerid"`
|
||||
Rlimits []configs.Rlimit `json:"rlimits"`
|
||||
CreateConsole bool `json:"create_console"`
|
||||
Rootless bool `json:"rootless"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, stateDirFD int) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := populateProcessEnvironment(config.Env); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch t {
|
||||
case initSetns:
|
||||
return &linuxSetnsInit{
|
||||
pipe: pipe,
|
||||
consoleSocket: consoleSocket,
|
||||
config: config,
|
||||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
pipe: pipe,
|
||||
consoleSocket: consoleSocket,
|
||||
parentPid: unix.Getppid(),
|
||||
config: config,
|
||||
stateDirFD: stateDirFD,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
}
|
||||
|
||||
// populateProcessEnvironment loads the provided environment variables into the
|
||||
// current processes's environment.
|
||||
func populateProcessEnvironment(env []string) error {
|
||||
for _, pair := range env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaked file descriptors
|
||||
// before executing the command inside the namespace
|
||||
func finalizeNamespace(config *initConfig) error {
|
||||
// Ensure that all unwanted fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
capabilities := &configs.Capabilities{}
|
||||
if config.Capabilities != nil {
|
||||
capabilities = config.Capabilities
|
||||
} else if config.Config.Capabilities != nil {
|
||||
capabilities = config.Config.Capabilities
|
||||
}
|
||||
w, err := newContainerCapList(capabilities)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := w.ApplyBoundingSet(); err != nil {
|
||||
return err
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.ApplyCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Cwd != "" {
|
||||
if err := unix.Chdir(config.Cwd); err != nil {
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupConsole sets up the console from inside the container, and sends the
|
||||
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
|
||||
// consoles are scoped to a container properly (see runc#814 and the many
|
||||
// issues related to that). This has to be run *after* we've pivoted to the new
|
||||
// rootfs (and the users' configuration is entirely set up).
|
||||
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
|
||||
defer socket.Close()
|
||||
// At this point, /dev/ptmx points to something that we would expect. We
|
||||
// used to change the owner of the slave path, but since the /dev/pts mount
|
||||
// can have gid=X set (at the users' option). So touching the owner of the
|
||||
// slave PTY is not necessary, as the kernel will handle that for us. Note
|
||||
// however, that setupUser (specifically fixStdioPermissions) *will* change
|
||||
// the UID owner of the console to be the user the process will run as (so
|
||||
// they can actually control their console).
|
||||
console, err := newConsole()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// After we return from here, we don't need the console anymore.
|
||||
defer console.Close()
|
||||
|
||||
linuxConsole, ok := console.(*linuxConsole)
|
||||
if !ok {
|
||||
return fmt.Errorf("failed to cast console to *linuxConsole")
|
||||
}
|
||||
// Mount the console inside our rootfs.
|
||||
if mount {
|
||||
if err := linuxConsole.mount(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// While we can access console.master, using the API is a good idea.
|
||||
if err := utils.SendFd(socket, linuxConsole.File()); err != nil {
|
||||
return err
|
||||
}
|
||||
// Now, dup over all the things.
|
||||
return linuxConsole.dupStdio()
|
||||
}
|
||||
|
||||
// syncParentReady sends to the given pipe a JSON payload which indicates that
|
||||
// the init is ready to Exec the child process. It then waits for the parent to
|
||||
// indicate that it is cleared to Exec.
|
||||
func syncParentReady(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := writeSync(pipe, procReady); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
if err := readSync(pipe, procRun); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// syncParentHooks sends to the given pipe a JSON payload which indicates that
|
||||
// the parent should execute pre-start hooks. It then waits for the parent to
|
||||
// indicate that it is cleared to resume.
|
||||
func syncParentHooks(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := writeSync(pipe, procHooks); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
if err := readSync(pipe, procResume); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *initConfig) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
Home: "/",
|
||||
}
|
||||
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var addGroups []int
|
||||
if len(config.AdditionalGroups) > 0 {
|
||||
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if config.Rootless {
|
||||
if execUser.Uid != 0 {
|
||||
return fmt.Errorf("cannot run as a non-root user in a rootless container")
|
||||
}
|
||||
|
||||
if execUser.Gid != 0 {
|
||||
return fmt.Errorf("cannot run as a non-root group in a rootless container")
|
||||
}
|
||||
|
||||
// We cannot set any additional groups in a rootless container and thus we
|
||||
// bail if the user asked us to do so. TODO: We currently can't do this
|
||||
// earlier, but if libcontainer.Process.User was typesafe this might work.
|
||||
if len(addGroups) > 0 {
|
||||
return fmt.Errorf("cannot set any additional groups in a rootless container")
|
||||
}
|
||||
}
|
||||
|
||||
// before we change to the container's user make sure that the processes STDIO
|
||||
// is correctly owned by the user that we are switching to.
|
||||
if err := fixStdioPermissions(config, execUser); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
|
||||
// There's nothing we can do about /etc/group entries, so we silently
|
||||
// ignore setting groups here (since the user didn't explicitly ask us to
|
||||
// set the group).
|
||||
if !config.Rootless {
|
||||
suppGroups := append(execUser.Sgids, addGroups...)
|
||||
if err := unix.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
|
||||
// The ownership needs to match because it is created outside of the container and needs to be
|
||||
// localized.
|
||||
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
|
||||
var null unix.Stat_t
|
||||
if err := unix.Stat("/dev/null", &null); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fd := range []uintptr{
|
||||
os.Stdin.Fd(),
|
||||
os.Stderr.Fd(),
|
||||
os.Stdout.Fd(),
|
||||
} {
|
||||
var s unix.Stat_t
|
||||
if err := unix.Fstat(int(fd), &s); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip chown of /dev/null if it was used as one of the STDIO fds.
|
||||
if s.Rdev == null.Rdev {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip chown if s.Gid is actually an unmapped gid in the host. While
|
||||
// this is a bit dodgy if it just so happens that the console _is_
|
||||
// owned by overflow_gid, there's no way for us to disambiguate this as
|
||||
// a userspace program.
|
||||
if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// We only change the uid owner (as it is possible for the mount to
|
||||
// prefer a different gid, and there's no reason for us to change it).
|
||||
// The reason why we don't just leave the default uid=X mount setup is
|
||||
// that users expect to be able to actually use their console. Without
|
||||
// this code, you couldn't effectively run as a non-root user inside a
|
||||
// container and also have a console set up.
|
||||
if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupNetwork sets up and initializes any network interface inside the container.
|
||||
func setupNetwork(config *initConfig) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := strategy.initialize(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
_, dst, err := net.ParseCIDR(config.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
src := net.ParseIP(config.Source)
|
||||
if src == nil {
|
||||
return fmt.Errorf("Invalid source for route: %s", config.Source)
|
||||
}
|
||||
gw := net.ParseIP(config.Gateway)
|
||||
if gw == nil {
|
||||
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
|
||||
}
|
||||
l, err := netlink.LinkByName(config.InterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
route := &netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Gw: gw,
|
||||
LinkIndex: l.Attrs().Index,
|
||||
}
|
||||
if err := netlink.RouteAdd(route); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(limits []configs.Rlimit, pid int) error {
|
||||
for _, rlimit := range limits {
|
||||
if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const _P_PID = 1
|
||||
|
||||
type siginfo struct {
|
||||
si_signo int32
|
||||
si_errno int32
|
||||
si_code int32
|
||||
// below here is a union; si_pid is the only field we use
|
||||
si_pid int32
|
||||
// Pad to 128 bytes as detailed in blockUntilWaitable
|
||||
pad [96]byte
|
||||
}
|
||||
|
||||
// isWaitable returns true if the process has exited false otherwise.
|
||||
// Its based off blockUntilWaitable in src/os/wait_waitid.go
|
||||
func isWaitable(pid int) (bool, error) {
|
||||
si := &siginfo{}
|
||||
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
|
||||
if e != 0 {
|
||||
return false, os.NewSyscallError("waitid", e)
|
||||
}
|
||||
|
||||
return si.si_pid != 0, nil
|
||||
}
|
||||
|
||||
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
|
||||
func isNoChildren(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case syscall.Errno:
|
||||
if err == unix.ECHILD {
|
||||
return true
|
||||
}
|
||||
case *os.SyscallError:
|
||||
if err.Err == unix.ECHILD {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// signalAllProcesses freezes then iterates over all the processes inside the
|
||||
// manager's cgroups sending the signal s to them.
|
||||
// If s is SIGKILL then it will wait for each process to exit.
|
||||
// For all other signals it will check if the process is ready to report its
|
||||
// exit status and only if it is will a wait be performed.
|
||||
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
|
||||
var procs []*os.Process
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
pids, err := m.GetAllPids()
|
||||
if err != nil {
|
||||
m.Freeze(configs.Thawed)
|
||||
return err
|
||||
}
|
||||
for _, pid := range pids {
|
||||
p, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
logrus.Warn(err)
|
||||
continue
|
||||
}
|
||||
procs = append(procs, p)
|
||||
if err := p.Signal(s); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
if err := m.Freeze(configs.Thawed); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
for _, p := range procs {
|
||||
if s != unix.SIGKILL {
|
||||
if ok, err := isWaitable(p.Pid); err != nil {
|
||||
if !isNoChildren(err) {
|
||||
logrus.Warn("signalAllProcesses: ", p.Pid, err)
|
||||
}
|
||||
continue
|
||||
} else if !ok {
|
||||
// Not ready to report so don't wait
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := p.Wait(); err != nil {
|
||||
if !isNoChildren(err) {
|
||||
logrus.Warn("wait: ", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
50
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
Normal file
50
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
// +build linux
|
||||
|
||||
package keys
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type KeySerial uint32
|
||||
|
||||
func JoinSessionKeyring(name string) (KeySerial, error) {
|
||||
sessKeyId, err := unix.KeyctlJoinSessionKeyring(name)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("could not create session key: %v", err)
|
||||
}
|
||||
return KeySerial(sessKeyId), nil
|
||||
}
|
||||
|
||||
// ModKeyringPerm modifies permissions on a keyring by reading the current permissions,
|
||||
// anding the bits with the given mask (clearing permissions) and setting
|
||||
// additional permission bits
|
||||
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
|
||||
dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
res := strings.Split(string(dest), ";")
|
||||
if len(res) < 5 {
|
||||
return fmt.Errorf("Destination buffer for key description is too small")
|
||||
}
|
||||
|
||||
// parse permissions
|
||||
perm64, err := strconv.ParseUint(res[3], 16, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
perm := (uint32(perm64) & mask) | setbits
|
||||
|
||||
if err := unix.KeyctlSetperm(int(ringId), perm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
87
vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
generated
vendored
Normal file
87
vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// list of known message types we want to send to bootstrap program
|
||||
// The number is randomly chosen to not conflict with known netlink types
|
||||
const (
|
||||
InitMsg uint16 = 62000
|
||||
CloneFlagsAttr uint16 = 27281
|
||||
NsPathsAttr uint16 = 27282
|
||||
UidmapAttr uint16 = 27283
|
||||
GidmapAttr uint16 = 27284
|
||||
SetgroupAttr uint16 = 27285
|
||||
OomScoreAdjAttr uint16 = 27286
|
||||
RootlessAttr uint16 = 27287
|
||||
)
|
||||
|
||||
type Int32msg struct {
|
||||
Type uint16
|
||||
Value uint32
|
||||
}
|
||||
|
||||
// Serialize serializes the message.
|
||||
// Int32msg has the following representation
|
||||
// | nlattr len | nlattr type |
|
||||
// | uint32 value |
|
||||
func (msg *Int32msg) Serialize() []byte {
|
||||
buf := make([]byte, msg.Len())
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
native.PutUint32(buf[4:8], msg.Value)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Int32msg) Len() int {
|
||||
return unix.NLA_HDRLEN + 4
|
||||
}
|
||||
|
||||
// Bytemsg has the following representation
|
||||
// | nlattr len | nlattr type |
|
||||
// | value | pad |
|
||||
type Bytemsg struct {
|
||||
Type uint16
|
||||
Value []byte
|
||||
}
|
||||
|
||||
func (msg *Bytemsg) Serialize() []byte {
|
||||
l := msg.Len()
|
||||
buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1))
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(l))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
copy(buf[4:], msg.Value)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Bytemsg) Len() int {
|
||||
return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
|
||||
}
|
||||
|
||||
type Boolmsg struct {
|
||||
Type uint16
|
||||
Value bool
|
||||
}
|
||||
|
||||
func (msg *Boolmsg) Serialize() []byte {
|
||||
buf := make([]byte, msg.Len())
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
if msg.Value {
|
||||
buf[4] = 1
|
||||
} else {
|
||||
buf[4] = 0
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Boolmsg) Len() int {
|
||||
return unix.NLA_HDRLEN + 1
|
||||
}
|
259
vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
generated
vendored
Normal file
259
vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,259 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
var strategies = map[string]networkStrategy{
|
||||
"veth": &veth{},
|
||||
"loopback": &loopback{},
|
||||
}
|
||||
|
||||
// networkStrategy represents a specific network configuration for
|
||||
// a container's networking stack
|
||||
type networkStrategy interface {
|
||||
create(*network, int) error
|
||||
initialize(*network) error
|
||||
detach(*configs.Network) error
|
||||
attach(*configs.Network) error
|
||||
}
|
||||
|
||||
// getStrategy returns the specific network strategy for the
|
||||
// provided type.
|
||||
func getStrategy(tpe string) (networkStrategy, error) {
|
||||
s, exists := strategies[tpe]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("unknown strategy type %q", tpe)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
|
||||
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
|
||||
out := &NetworkInterface{Name: interfaceName}
|
||||
// This can happen if the network runtime information is missing - possible if the
|
||||
// container was created by an old version of libcontainer.
|
||||
if interfaceName == "" {
|
||||
return out, nil
|
||||
}
|
||||
type netStatsPair struct {
|
||||
// Where to write the output.
|
||||
Out *uint64
|
||||
// The network stats file to read.
|
||||
File string
|
||||
}
|
||||
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
|
||||
netStats := []netStatsPair{
|
||||
{Out: &out.RxBytes, File: "tx_bytes"},
|
||||
{Out: &out.RxPackets, File: "tx_packets"},
|
||||
{Out: &out.RxErrors, File: "tx_errors"},
|
||||
{Out: &out.RxDropped, File: "tx_dropped"},
|
||||
|
||||
{Out: &out.TxBytes, File: "rx_bytes"},
|
||||
{Out: &out.TxPackets, File: "rx_packets"},
|
||||
{Out: &out.TxErrors, File: "rx_errors"},
|
||||
{Out: &out.TxDropped, File: "rx_dropped"},
|
||||
}
|
||||
for _, netStat := range netStats {
|
||||
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*(netStat.Out) = data
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
|
||||
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
|
||||
}
|
||||
|
||||
// loopback is a network strategy that provides a basic loopback device
|
||||
type loopback struct {
|
||||
}
|
||||
|
||||
func (l *loopback) create(n *network, nspid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *loopback) initialize(config *network) error {
|
||||
return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}})
|
||||
}
|
||||
|
||||
func (l *loopback) attach(n *configs.Network) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *loopback) detach(n *configs.Network) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// veth is a network strategy that uses a bridge and creates
|
||||
// a veth pair, one that is attached to the bridge on the host and the other
|
||||
// is placed inside the container's namespace
|
||||
type veth struct {
|
||||
}
|
||||
|
||||
func (v *veth) detach(n *configs.Network) (err error) {
|
||||
return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
|
||||
}
|
||||
|
||||
// attach a container network interface to an external network
|
||||
func (v *veth) attach(n *configs.Network) (err error) {
|
||||
brl, err := netlink.LinkByName(n.Bridge)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
br, ok := brl.(*netlink.Bridge)
|
||||
if !ok {
|
||||
return fmt.Errorf("Wrong device type %T", brl)
|
||||
}
|
||||
host, err := netlink.LinkByName(n.HostInterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := netlink.LinkSetMaster(host, br); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.HairpinMode {
|
||||
if err := netlink.LinkSetHairpin(host, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetUp(host); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *veth) create(n *network, nspid int) (err error) {
|
||||
tmpName, err := v.generateTempPeerName()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.TempVethPeerName = tmpName
|
||||
if n.Bridge == "" {
|
||||
return fmt.Errorf("bridge is not specified")
|
||||
}
|
||||
veth := &netlink.Veth{
|
||||
LinkAttrs: netlink.LinkAttrs{
|
||||
Name: n.HostInterfaceName,
|
||||
TxQLen: n.TxQueueLen,
|
||||
},
|
||||
PeerName: n.TempVethPeerName,
|
||||
}
|
||||
if err := netlink.LinkAdd(veth); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
netlink.LinkDel(veth)
|
||||
}
|
||||
}()
|
||||
if err := v.attach(&n.Network); err != nil {
|
||||
return err
|
||||
}
|
||||
child, err := netlink.LinkByName(n.TempVethPeerName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.LinkSetNsPid(child, nspid)
|
||||
}
|
||||
|
||||
func (v *veth) generateTempPeerName() (string, error) {
|
||||
return utils.GenerateRandomName("veth", 7)
|
||||
}
|
||||
|
||||
func (v *veth) initialize(config *network) error {
|
||||
peer := config.TempVethPeerName
|
||||
if peer == "" {
|
||||
return fmt.Errorf("peer is not specified")
|
||||
}
|
||||
child, err := netlink.LinkByName(peer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetDown(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetName(child, config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
// get the interface again after we changed the name as the index also changes.
|
||||
if child, err = netlink.LinkByName(config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.MacAddress != "" {
|
||||
mac, err := net.ParseMAC(config.MacAddress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
ip, err := netlink.ParseAddr(config.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.IPv6Address != "" {
|
||||
ip6, err := netlink.ParseAddr(config.IPv6Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip6); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetUp(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Gateway != "" {
|
||||
gw := net.ParseIP(config.Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.IPv6Gateway != "" {
|
||||
gw := net.ParseIP(config.IPv6Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
90
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
Normal file
90
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,90 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const oomCgroupName = "memory"
|
||||
|
||||
type PressureLevel uint
|
||||
|
||||
const (
|
||||
LowPressure PressureLevel = iota
|
||||
MediumPressure
|
||||
CriticalPressure
|
||||
)
|
||||
|
||||
func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) {
|
||||
evFile, err := os.Open(filepath.Join(cgDir, evName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
|
||||
if err != nil {
|
||||
evFile.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
eventfd := os.NewFile(uintptr(fd), "eventfd")
|
||||
|
||||
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
|
||||
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
|
||||
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
|
||||
eventfd.Close()
|
||||
evFile.Close()
|
||||
return nil, err
|
||||
}
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer func() {
|
||||
close(ch)
|
||||
eventfd.Close()
|
||||
evFile.Close()
|
||||
}()
|
||||
buf := make([]byte, 8)
|
||||
for {
|
||||
if _, err := eventfd.Read(buf); err != nil {
|
||||
return
|
||||
}
|
||||
// When a cgroup is destroyed, an event is sent to eventfd.
|
||||
// So if the control path is gone, return instead of notifying.
|
||||
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
|
||||
return
|
||||
}
|
||||
ch <- struct{}{}
|
||||
}
|
||||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// notifyOnOOM returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
return registerMemoryEvent(dir, "memory.oom_control", "")
|
||||
}
|
||||
|
||||
func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
if level > CriticalPressure {
|
||||
return nil, fmt.Errorf("invalid pressure level %d", level)
|
||||
}
|
||||
|
||||
levelStr := []string{"low", "medium", "critical"}[level]
|
||||
return registerMemoryEvent(dir, "memory.pressure_level", levelStr)
|
||||
}
|
106
vendor/github.com/opencontainers/runc/libcontainer/process.go
generated
vendored
Normal file
106
vendor/github.com/opencontainers/runc/libcontainer/process.go
generated
vendored
Normal file
|
@ -0,0 +1,106 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type processOperations interface {
|
||||
wait() (*os.ProcessState, error)
|
||||
signal(sig os.Signal) error
|
||||
pid() int
|
||||
}
|
||||
|
||||
// Process specifies the configuration and IO for a process inside
|
||||
// a container.
|
||||
type Process struct {
|
||||
// The command to be run followed by any arguments.
|
||||
Args []string
|
||||
|
||||
// Env specifies the environment variables for the process.
|
||||
Env []string
|
||||
|
||||
// User will set the uid and gid of the executing process running inside the container
|
||||
// local to the container's user and group configuration.
|
||||
User string
|
||||
|
||||
// AdditionalGroups specifies the gids that should be added to supplementary groups
|
||||
// in addition to those that the user belongs to.
|
||||
AdditionalGroups []string
|
||||
|
||||
// Cwd will change the processes current working directory inside the container's rootfs.
|
||||
Cwd string
|
||||
|
||||
// Stdin is a pointer to a reader which provides the standard input stream.
|
||||
Stdin io.Reader
|
||||
|
||||
// Stdout is a pointer to a writer which receives the standard output stream.
|
||||
Stdout io.Writer
|
||||
|
||||
// Stderr is a pointer to a writer which receives the standard error stream.
|
||||
Stderr io.Writer
|
||||
|
||||
// ExtraFiles specifies additional open files to be inherited by the container
|
||||
ExtraFiles []*os.File
|
||||
|
||||
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||
// All capabilities not specified will be dropped from the processes capability mask
|
||||
Capabilities *configs.Capabilities
|
||||
|
||||
// AppArmorProfile specifies the profile to apply to the process and is
|
||||
// changed at the time the process is execed
|
||||
AppArmorProfile string
|
||||
|
||||
// Label specifies the label to apply to the process. It is commonly used by selinux
|
||||
Label string
|
||||
|
||||
// NoNewPrivileges controls whether processes can gain additional privileges.
|
||||
NoNewPrivileges *bool
|
||||
|
||||
// Rlimits specifies the resource limits, such as max open files, to set in the container
|
||||
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||
Rlimits []configs.Rlimit
|
||||
|
||||
// ConsoleSocket provides the masterfd console.
|
||||
ConsoleSocket *os.File
|
||||
|
||||
ops processOperations
|
||||
}
|
||||
|
||||
// Wait waits for the process to exit.
|
||||
// Wait releases any resources associated with the Process
|
||||
func (p Process) Wait() (*os.ProcessState, error) {
|
||||
if p.ops == nil {
|
||||
return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.wait()
|
||||
}
|
||||
|
||||
// Pid returns the process ID
|
||||
func (p Process) Pid() (int, error) {
|
||||
// math.MinInt32 is returned here, because it's invalid value
|
||||
// for the kill() system call.
|
||||
if p.ops == nil {
|
||||
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.pid(), nil
|
||||
}
|
||||
|
||||
// Signal sends a signal to the Process.
|
||||
func (p Process) Signal(sig os.Signal) error {
|
||||
if p.ops == nil {
|
||||
return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.signal(sig)
|
||||
}
|
||||
|
||||
// IO holds the process's STDIO
|
||||
type IO struct {
|
||||
Stdin io.WriteCloser
|
||||
Stdout io.ReadCloser
|
||||
Stderr io.ReadCloser
|
||||
}
|
493
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
Normal file
493
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,493 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // only for Signal
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type parentProcess interface {
|
||||
// pid returns the pid for the running process.
|
||||
pid() int
|
||||
|
||||
// start starts the process execution.
|
||||
start() error
|
||||
|
||||
// send a SIGKILL to the process and wait for the exit.
|
||||
terminate() error
|
||||
|
||||
// wait waits on the process returning the process state.
|
||||
wait() (*os.ProcessState, error)
|
||||
|
||||
// startTime returns the process start time.
|
||||
startTime() (uint64, error)
|
||||
|
||||
signal(os.Signal) error
|
||||
|
||||
externalDescriptors() []string
|
||||
|
||||
setExternalDescriptors(fds []string)
|
||||
}
|
||||
|
||||
type setnsProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
cgroupPaths map[string]string
|
||||
config *initConfig
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (uint64, error) {
|
||||
stat, err := system.Stat(p.pid())
|
||||
return stat.StartTime, err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
return unix.Kill(p.pid(), s)
|
||||
}
|
||||
|
||||
func (p *setnsProcess) start() (err error) {
|
||||
defer p.parentPipe.Close()
|
||||
err = p.cmd.Start()
|
||||
p.childPipe.Close()
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "starting setns process")
|
||||
}
|
||||
if p.bootstrapData != nil {
|
||||
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
}
|
||||
if err = p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "executing setns process")
|
||||
}
|
||||
// We can't join cgroups if we're in a rootless container.
|
||||
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
}
|
||||
}
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rlimits for process")
|
||||
}
|
||||
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing config to pipe")
|
||||
}
|
||||
|
||||
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
|
||||
switch sync.Type {
|
||||
case procReady:
|
||||
// This shouldn't happen.
|
||||
panic("unexpected procReady in setns")
|
||||
case procHooks:
|
||||
// This shouldn't happen.
|
||||
panic("unexpected procHooks in setns")
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
|
||||
}
|
||||
})
|
||||
|
||||
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
|
||||
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return ierr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// execSetns runs the process that executes C code to perform the setns calls
|
||||
// because setns support requires the C process to fork off a child and perform the setns
|
||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||
// over the provided pipe.
|
||||
func (p *setnsProcess) execSetns() error {
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
p.cmd.Wait()
|
||||
return newSystemErrorWithCause(err, "waiting on setns process to finish")
|
||||
}
|
||||
if !status.Success() {
|
||||
p.cmd.Wait()
|
||||
return newSystemError(&exec.ExitError{ProcessState: status})
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||
p.cmd.Wait()
|
||||
return newSystemErrorWithCause(err, "reading pid from init pipe")
|
||||
}
|
||||
process, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.cmd.Process = process
|
||||
p.process.ops = p
|
||||
return nil
|
||||
}
|
||||
|
||||
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
|
||||
// avoid the process becoming a zombie.
|
||||
func (p *setnsProcess) terminate() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) wait() (*os.ProcessState, error) {
|
||||
err := p.cmd.Wait()
|
||||
|
||||
// Return actual ProcessState even on Wait error
|
||||
return p.cmd.ProcessState, err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) pid() int {
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *setnsProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *setnsProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
type initProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
config *initConfig
|
||||
manager cgroups.Manager
|
||||
container *linuxContainer
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
sharePidns bool
|
||||
rootDir *os.File
|
||||
}
|
||||
|
||||
func (p *initProcess) pid() int {
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *initProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
// execSetns runs the process that executes C code to perform the setns calls
|
||||
// because setns support requires the C process to fork off a child and perform the setns
|
||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||
// over the provided pipe.
|
||||
// This is called by initProcess.start function
|
||||
func (p *initProcess) execSetns() error {
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
p.cmd.Wait()
|
||||
return err
|
||||
}
|
||||
if !status.Success() {
|
||||
p.cmd.Wait()
|
||||
return &exec.ExitError{ProcessState: status}
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||
p.cmd.Wait()
|
||||
return err
|
||||
}
|
||||
process, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.cmd.Process = process
|
||||
p.process.ops = p
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) start() error {
|
||||
defer p.parentPipe.Close()
|
||||
err := p.cmd.Start()
|
||||
p.process.ops = p
|
||||
p.childPipe.Close()
|
||||
p.rootDir.Close()
|
||||
if err != nil {
|
||||
p.process.ops = nil
|
||||
return newSystemErrorWithCause(err, "starting init process command")
|
||||
}
|
||||
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
if err := p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "running exec setns process for init")
|
||||
}
|
||||
// Save the standard descriptor names before the container process
|
||||
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||
// we won't know at checkpoint time which file descriptor to look up.
|
||||
fds, err := getPipeFds(p.pid())
|
||||
if err != nil {
|
||||
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
// Do this before syncing with child so that no children can escape the
|
||||
// cgroup. We don't need to worry about not doing this and not being root
|
||||
// because we'd be using the rootless cgroup manager in that case.
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// TODO: should not be the responsibility to call here
|
||||
p.manager.Destroy()
|
||||
}
|
||||
}()
|
||||
if err := p.createNetworkInterfaces(); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating network interfaces")
|
||||
}
|
||||
if err := p.sendConfig(); err != nil {
|
||||
return newSystemErrorWithCause(err, "sending config to init process")
|
||||
}
|
||||
var (
|
||||
sentRun bool
|
||||
sentResume bool
|
||||
)
|
||||
|
||||
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
|
||||
switch sync.Type {
|
||||
case procReady:
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rlimits for ready process")
|
||||
}
|
||||
// call prestart hooks
|
||||
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
}
|
||||
|
||||
if p.config.Config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: p.container.config.Version,
|
||||
ID: p.container.id,
|
||||
Pid: p.pid(),
|
||||
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
|
||||
}
|
||||
for i, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sync with child.
|
||||
if err := writeSync(p.parentPipe, procRun); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing syncT 'run'")
|
||||
}
|
||||
sentRun = true
|
||||
case procHooks:
|
||||
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
|
||||
}
|
||||
if p.config.Config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: p.container.config.Version,
|
||||
ID: p.container.id,
|
||||
Pid: p.pid(),
|
||||
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
|
||||
}
|
||||
for i, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sync with child.
|
||||
if err := writeSync(p.parentPipe, procResume); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing syncT 'resume'")
|
||||
}
|
||||
sentResume = true
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if !sentRun {
|
||||
return newSystemErrorWithCause(ierr, "container init")
|
||||
}
|
||||
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
|
||||
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
|
||||
}
|
||||
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
|
||||
return newSystemErrorWithCause(err, "shutting down init pipe")
|
||||
}
|
||||
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return ierr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) wait() (*os.ProcessState, error) {
|
||||
err := p.cmd.Wait()
|
||||
if err != nil {
|
||||
return p.cmd.ProcessState, err
|
||||
}
|
||||
// we should kill all processes in cgroup when init is died if we use host PID namespace
|
||||
if p.sharePidns {
|
||||
signalAllProcesses(p.manager, unix.SIGKILL)
|
||||
}
|
||||
return p.cmd.ProcessState, nil
|
||||
}
|
||||
|
||||
func (p *initProcess) terminate() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *initProcess) startTime() (uint64, error) {
|
||||
stat, err := system.Stat(p.pid())
|
||||
return stat.StartTime, err
|
||||
}
|
||||
|
||||
func (p *initProcess) sendConfig() error {
|
||||
// send the config to the container's init process, we don't use JSON Encode
|
||||
// here because there might be a problem in JSON decoder in some cases, see:
|
||||
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
|
||||
return utils.WriteJSON(p.parentPipe, p.config)
|
||||
}
|
||||
|
||||
func (p *initProcess) createNetworkInterfaces() error {
|
||||
for _, config := range p.config.Config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n := &network{
|
||||
Network: *config,
|
||||
}
|
||||
if err := strategy.create(n, p.pid()); err != nil {
|
||||
return err
|
||||
}
|
||||
p.config.Networks = append(p.config.Networks, n)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
return unix.Kill(p.pid(), s)
|
||||
}
|
||||
|
||||
func (p *initProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
func getPipeFds(pid int) ([]string, error) {
|
||||
fds := make([]string, 3)
|
||||
|
||||
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
|
||||
for i := 0; i < 3; i++ {
|
||||
// XXX: This breaks if the path is not a valid symlink (which can
|
||||
// happen in certain particularly unlucky mount namespace setups).
|
||||
f := filepath.Join(dirPath, strconv.Itoa(i))
|
||||
target, err := os.Readlink(f)
|
||||
if err != nil {
|
||||
// Ignore permission errors, for rootless containers and other
|
||||
// non-dumpable processes. if we can't get the fd for a particular
|
||||
// file, there's not much we can do.
|
||||
if os.IsPermission(err) {
|
||||
continue
|
||||
}
|
||||
return fds, err
|
||||
}
|
||||
fds[i] = target
|
||||
}
|
||||
return fds, nil
|
||||
}
|
||||
|
||||
// InitializeIO creates pipes for use with the process's stdio and returns the
|
||||
// opposite side for each. Do not use this if you want to have a pseudoterminal
|
||||
// set up for you by libcontainer (TODO: fix that too).
|
||||
// TODO: This is mostly unnecessary, and should be handled by clients.
|
||||
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
|
||||
var fds []uintptr
|
||||
i = &IO{}
|
||||
// cleanup in case of an error
|
||||
defer func() {
|
||||
if err != nil {
|
||||
for _, fd := range fds {
|
||||
unix.Close(int(fd))
|
||||
}
|
||||
}
|
||||
}()
|
||||
// STDIN
|
||||
r, w, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stdin, i.Stdin = r, w
|
||||
// STDOUT
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stdout, i.Stdout = w, r
|
||||
// STDERR
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stderr, i.Stderr = w, r
|
||||
// change ownership of the pipes incase we are in a user namespace
|
||||
for _, fd := range fds {
|
||||
if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return i, nil
|
||||
}
|
122
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
Normal file
122
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
Normal file
|
@ -0,0 +1,122 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
|
||||
var (
|
||||
err error
|
||||
)
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &restoredProcess{
|
||||
proc: proc,
|
||||
processStartTime: stat.StartTime,
|
||||
fds: fds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type restoredProcess struct {
|
||||
proc *os.Process
|
||||
processStartTime uint64
|
||||
fds []string
|
||||
}
|
||||
|
||||
func (p *restoredProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
}
|
||||
|
||||
func (p *restoredProcess) pid() int {
|
||||
return p.proc.Pid
|
||||
}
|
||||
|
||||
func (p *restoredProcess) terminate() error {
|
||||
err := p.proc.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
||||
// TODO: how do we wait on the actual process?
|
||||
// maybe use --exec-cmd in criu
|
||||
st, err := p.proc.Wait()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (p *restoredProcess) startTime() (uint64, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
func (p *restoredProcess) signal(s os.Signal) error {
|
||||
return p.proc.Signal(s)
|
||||
}
|
||||
|
||||
func (p *restoredProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *restoredProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
// nonChildProcess represents a process where the calling process is not
|
||||
// the parent process. This process is created when a factory loads a container from
|
||||
// a persisted state.
|
||||
type nonChildProcess struct {
|
||||
processPid int
|
||||
processStartTime uint64
|
||||
fds []string
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) pid() int {
|
||||
return p.processPid
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) terminate() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
|
||||
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) startTime() (uint64, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) signal(s os.Signal) error {
|
||||
proc, err := os.FindProcess(p.processPid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return proc.Signal(s)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
812
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
Normal file
812
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,812 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/symlink"
|
||||
"github.com/mrunalp/fileutils"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||
|
||||
// needsSetupDev returns true if /dev needs to be set up.
|
||||
func needsSetupDev(config *configs.Config) bool {
|
||||
for _, m := range config.Mounts {
|
||||
if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// prepareRootfs sets up the devices, mount points, and filesystems for use
|
||||
// inside a new mount namespace. It doesn't set anything as ro. You must call
|
||||
// finalizeRootfs after this function to finish setting up the rootfs.
|
||||
func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
|
||||
if err := prepareRoot(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "preparing rootfs")
|
||||
}
|
||||
|
||||
setupDev := needsSetupDev(config)
|
||||
for _, m := range config.Mounts {
|
||||
for _, precmd := range m.PremountCmds {
|
||||
if err := mountCmd(precmd); err != nil {
|
||||
return newSystemErrorWithCause(err, "running premount command")
|
||||
}
|
||||
}
|
||||
|
||||
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
|
||||
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
|
||||
}
|
||||
|
||||
for _, postcmd := range m.PostmountCmds {
|
||||
if err := mountCmd(postcmd); err != nil {
|
||||
return newSystemErrorWithCause(err, "running postmount command")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if setupDev {
|
||||
if err := createDevices(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating device nodes")
|
||||
}
|
||||
if err := setupPtmx(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting up ptmx")
|
||||
}
|
||||
if err := setupDevSymlinks(config.Rootfs); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting up /dev symlinks")
|
||||
}
|
||||
}
|
||||
|
||||
// Signal the parent to run the pre-start hooks.
|
||||
// The hooks are run after the mounts are setup, but before we switch to the new
|
||||
// root, so that the old root is still available in the hooks for any mount
|
||||
// manipulations.
|
||||
if err := syncParentHooks(pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// The reason these operations are done here rather than in finalizeRootfs
|
||||
// is because the console-handling code gets quite sticky if we have to set
|
||||
// up the console before doing the pivot_root(2). This is because the
|
||||
// Console API has to also work with the ExecIn case, which means that the
|
||||
// API must be able to deal with being inside as well as outside the
|
||||
// container. It's just cleaner to do this here (at the expense of the
|
||||
// operation not being perfectly split).
|
||||
|
||||
if err := unix.Chdir(config.Rootfs); err != nil {
|
||||
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
|
||||
}
|
||||
|
||||
if config.NoPivotRoot {
|
||||
err = msMoveRoot(config.Rootfs)
|
||||
} else {
|
||||
err = pivotRoot(config.Rootfs)
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "jailing process inside rootfs")
|
||||
}
|
||||
|
||||
if setupDev {
|
||||
if err := reOpenDevNull(); err != nil {
|
||||
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeRootfs sets anything to ro if necessary. You must call
|
||||
// prepareRootfs first.
|
||||
func finalizeRootfs(config *configs.Config) (err error) {
|
||||
// remount dev as ro if specified
|
||||
for _, m := range config.Mounts {
|
||||
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||
if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
||||
if err := remountReadonly(m); err != nil {
|
||||
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// set rootfs ( / ) as readonly
|
||||
if config.Readonlyfs {
|
||||
if err := setReadonly(); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rootfs as readonly")
|
||||
}
|
||||
}
|
||||
|
||||
unix.Umask(0022)
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountCmd(cmd configs.Command) error {
|
||||
command := exec.Command(cmd.Path, cmd.Args[:]...)
|
||||
command.Env = cmd.Env
|
||||
command.Dir = cmd.Dir
|
||||
if out, err := command.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
|
||||
switch m.Device {
|
||||
case "proc", "sysfs":
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// Selinux kernels do not support labeling of /proc or /sys
|
||||
return mountPropagate(m, rootfs, "")
|
||||
case "mqueue":
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
// older kernels do not support labeling of /dev/mqueue
|
||||
if err := mountPropagate(m, rootfs, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return label.SetFileLabel(dest, mountLabel)
|
||||
}
|
||||
return nil
|
||||
case "tmpfs":
|
||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
tmpDir := ""
|
||||
stat, err := os.Stat(dest)
|
||||
if err != nil {
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if copyUp {
|
||||
tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
m.Destination = tmpDir
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if copyUp {
|
||||
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
|
||||
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
|
||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
}
|
||||
return errMsg
|
||||
}
|
||||
if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
|
||||
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
|
||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
}
|
||||
return errMsg
|
||||
}
|
||||
}
|
||||
if stat != nil {
|
||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case "bind":
|
||||
stat, err := os.Stat(m.Source)
|
||||
if err != nil {
|
||||
// error out if the source of a bind mount does not exist as we will be
|
||||
// unable to bind anything to it.
|
||||
return err
|
||||
}
|
||||
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
|
||||
// any previous mounts can invalidate the next mount's destination.
|
||||
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||
// evil stuff to try to escape the container's rootfs.
|
||||
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkMountDestination(rootfs, dest); err != nil {
|
||||
return err
|
||||
}
|
||||
// update the mount with the correct dest after symlinks are resolved.
|
||||
m.Destination = dest
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
// bind mount won't change mount options, we need remount to make mount options effective.
|
||||
// first check that we have non-default options required before attempting a remount
|
||||
if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
|
||||
// only remount if unique mount options are set
|
||||
if err := remount(m, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Relabel != "" {
|
||||
if err := label.Validate(m.Relabel); err != nil {
|
||||
return err
|
||||
}
|
||||
shared := label.IsShared(m.Relabel)
|
||||
if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case "cgroup":
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var merged []string
|
||||
for _, b := range binds {
|
||||
ss := filepath.Base(b.Destination)
|
||||
if strings.Contains(ss, ",") {
|
||||
merged = append(merged, ss)
|
||||
}
|
||||
}
|
||||
tmpfs := &configs.Mount{
|
||||
Source: "tmpfs",
|
||||
Device: "tmpfs",
|
||||
Destination: m.Destination,
|
||||
Flags: defaultMountFlags,
|
||||
Data: "mode=755",
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
}
|
||||
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, b := range binds {
|
||||
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, mc := range merged {
|
||||
for _, ss := range strings.Split(mc, ",") {
|
||||
// symlink(2) is very dumb, it will just shove the path into
|
||||
// the link and doesn't do any checks or relative path
|
||||
// conversion. Also, don't error out if the cgroup already exists.
|
||||
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.Flags&unix.MS_RDONLY != 0 {
|
||||
// remount cgroup root as readonly
|
||||
mcgrouproot := &configs.Mount{
|
||||
Source: m.Destination,
|
||||
Device: "bind",
|
||||
Destination: m.Destination,
|
||||
Flags: defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND,
|
||||
}
|
||||
if err := remount(mcgrouproot, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
// ensure that the destination of the mount is resolved of symlinks at mount time because
|
||||
// any previous mounts can invalidate the next mount's destination.
|
||||
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||
// evil stuff to try to escape the container's rootfs.
|
||||
var err error
|
||||
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkMountDestination(rootfs, dest); err != nil {
|
||||
return err
|
||||
}
|
||||
// update the mount with the correct dest after symlinks are resolved.
|
||||
m.Destination = dest
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return mountPropagate(m, rootfs, mountLabel)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
|
||||
mounts, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var binds []*configs.Mount
|
||||
|
||||
for _, mm := range mounts {
|
||||
dir, err := mm.GetOwnCgroup(cgroupPaths)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
relDir, err := filepath.Rel(mm.Root, dir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
binds = append(binds, &configs.Mount{
|
||||
Device: "bind",
|
||||
Source: filepath.Join(mm.Mountpoint, relDir),
|
||||
Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
|
||||
Flags: unix.MS_BIND | unix.MS_REC | m.Flags,
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
})
|
||||
}
|
||||
|
||||
return binds, nil
|
||||
}
|
||||
|
||||
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
|
||||
// dest is required to be an abs path and have any symlinks resolved before calling this function.
|
||||
func checkMountDestination(rootfs, dest string) error {
|
||||
invalidDestinations := []string{
|
||||
"/proc",
|
||||
}
|
||||
// White list, it should be sub directories of invalid destinations
|
||||
validDestinations := []string{
|
||||
// These entries can be bind mounted by files emulated by fuse,
|
||||
// so commands like top, free displays stats in container.
|
||||
"/proc/cpuinfo",
|
||||
"/proc/diskstats",
|
||||
"/proc/meminfo",
|
||||
"/proc/stat",
|
||||
"/proc/swaps",
|
||||
"/proc/uptime",
|
||||
"/proc/net/dev",
|
||||
}
|
||||
for _, valid := range validDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for _, invalid := range invalidDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." || !strings.HasPrefix(path, "..") {
|
||||
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupDevSymlinks(rootfs string) error {
|
||||
var links = [][2]string{
|
||||
{"/proc/self/fd", "/dev/fd"},
|
||||
{"/proc/self/fd/0", "/dev/stdin"},
|
||||
{"/proc/self/fd/1", "/dev/stdout"},
|
||||
{"/proc/self/fd/2", "/dev/stderr"},
|
||||
}
|
||||
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
|
||||
// in /dev if it exists in /proc.
|
||||
if _, err := os.Stat("/proc/kcore"); err == nil {
|
||||
links = append(links, [2]string{"/proc/kcore", "/dev/core"})
|
||||
}
|
||||
for _, link := range links {
|
||||
var (
|
||||
src = link[0]
|
||||
dst = filepath.Join(rootfs, link[1])
|
||||
)
|
||||
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("symlink %s %s %s", src, dst, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
|
||||
// this method will make them point to `/dev/null` in this container's rootfs. This
|
||||
// needs to be called after we chroot/pivot into the container's rootfs so that any
|
||||
// symlinks are resolved locally.
|
||||
func reOpenDevNull() error {
|
||||
var stat, devNullStat unix.Stat_t
|
||||
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to open /dev/null - %s", err)
|
||||
}
|
||||
defer file.Close()
|
||||
if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
|
||||
return err
|
||||
}
|
||||
for fd := 0; fd < 3; fd++ {
|
||||
if err := unix.Fstat(fd, &stat); err != nil {
|
||||
return err
|
||||
}
|
||||
if stat.Rdev == devNullStat.Rdev {
|
||||
// Close and re-open the fd.
|
||||
if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the device nodes in the container.
|
||||
func createDevices(config *configs.Config) error {
|
||||
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
|
||||
oldMask := unix.Umask(0000)
|
||||
for _, node := range config.Devices {
|
||||
// containers running in a user namespace are not allowed to mknod
|
||||
// devices so we can just bind mount it from the host.
|
||||
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
|
||||
unix.Umask(oldMask)
|
||||
return err
|
||||
}
|
||||
}
|
||||
unix.Umask(oldMask)
|
||||
return nil
|
||||
}
|
||||
|
||||
func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
dest := filepath.Join(rootfs, node.Path)
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bind {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
if err := mknodDevice(dest, node); err != nil {
|
||||
if os.IsExist(err) {
|
||||
return nil
|
||||
} else if os.IsPermission(err) {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mknodDevice(dest string, node *configs.Device) error {
|
||||
fileMode := node.FileMode
|
||||
switch node.Type {
|
||||
case 'c', 'u':
|
||||
fileMode |= unix.S_IFCHR
|
||||
case 'b':
|
||||
fileMode |= unix.S_IFBLK
|
||||
case 'p':
|
||||
fileMode |= unix.S_IFIFO
|
||||
default:
|
||||
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
|
||||
}
|
||||
if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
}
|
||||
|
||||
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
|
||||
for _, m := range mountinfo {
|
||||
if m.Mountpoint == dir {
|
||||
return m
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the parent mount point of directory passed in as argument. Also return
|
||||
// optional fields.
|
||||
func getParentMount(rootfs string) (string, string, error) {
|
||||
var path string
|
||||
|
||||
mountinfos, err := mount.GetMounts()
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
mountinfo := getMountInfo(mountinfos, rootfs)
|
||||
if mountinfo != nil {
|
||||
return rootfs, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
path = rootfs
|
||||
for {
|
||||
path = filepath.Dir(path)
|
||||
|
||||
mountinfo = getMountInfo(mountinfos, path)
|
||||
if mountinfo != nil {
|
||||
return path, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
if path == "/" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we are here, we did not find parent mount. Something is wrong.
|
||||
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
|
||||
}
|
||||
|
||||
// Make parent mount private if it was shared
|
||||
func rootfsParentMountPrivate(rootfs string) error {
|
||||
sharedMount := false
|
||||
|
||||
parentMount, optionalOpts, err := getParentMount(rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
optsSplit := strings.Split(optionalOpts, " ")
|
||||
for _, opt := range optsSplit {
|
||||
if strings.HasPrefix(opt, "shared:") {
|
||||
sharedMount = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Make parent mount PRIVATE if it was shared. It is needed for two
|
||||
// reasons. First of all pivot_root() will fail if parent mount is
|
||||
// shared. Secondly when we bind mount rootfs it will propagate to
|
||||
// parent namespace and we don't want that to happen.
|
||||
if sharedMount {
|
||||
return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareRoot(config *configs.Config) error {
|
||||
flag := unix.MS_SLAVE | unix.MS_REC
|
||||
if config.RootPropagation != 0 {
|
||||
flag = config.RootPropagation
|
||||
}
|
||||
if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make parent mount private to make sure following bind mount does
|
||||
// not propagate in other namespaces. Also it will help with kernel
|
||||
// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
|
||||
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "")
|
||||
}
|
||||
|
||||
func setReadonly() error {
|
||||
return unix.Mount("/", "/", "bind", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
|
||||
}
|
||||
|
||||
func setupPtmx(config *configs.Config) error {
|
||||
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
|
||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// pivotRoot will call pivot_root such that rootfs becomes the new root
|
||||
// filesystem, and everything else is cleaned up.
|
||||
func pivotRoot(rootfs string) error {
|
||||
// While the documentation may claim otherwise, pivot_root(".", ".") is
|
||||
// actually valid. What this results in is / being the new root but
|
||||
// /proc/self/cwd being the old root. Since we can play around with the cwd
|
||||
// with pivot_root this allows us to pivot without creating directories in
|
||||
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
|
||||
|
||||
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer unix.Close(oldroot)
|
||||
|
||||
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer unix.Close(newroot)
|
||||
|
||||
// Change to the new root so that the pivot_root actually acts on it.
|
||||
if err := unix.Fchdir(newroot); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := unix.PivotRoot(".", "."); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
|
||||
// Currently our "." is oldroot (according to the current kernel code).
|
||||
// However, purely for safety, we will fchdir(oldroot) since there isn't
|
||||
// really any guarantee from the kernel what /proc/self/cwd will be after a
|
||||
// pivot_root(2).
|
||||
|
||||
if err := unix.Fchdir(oldroot); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make oldroot rprivate to make sure our unmounts don't propagate to the
|
||||
// host (and thus bork the machine).
|
||||
if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
|
||||
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Switch back to our shiny new root.
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func msMoveRoot(rootfs string) error {
|
||||
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := unix.Chroot("."); err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chdir("/")
|
||||
}
|
||||
|
||||
// createIfNotExists creates a file or a directory only if it does not already exist.
|
||||
func createIfNotExists(path string, isDir bool) error {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if isDir {
|
||||
return os.MkdirAll(path, 0755)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := os.OpenFile(path, os.O_CREATE, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// readonlyPath will make a path read only.
|
||||
func readonlyPath(path string) error {
|
||||
if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
|
||||
}
|
||||
|
||||
// remountReadonly will remount an existing mount point and ensure that it is read-only.
|
||||
func remountReadonly(m *configs.Mount) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
flags = m.Flags
|
||||
)
|
||||
for i := 0; i < 5; i++ {
|
||||
if err := unix.Mount("", dest, "", uintptr(flags|unix.MS_REMOUNT|unix.MS_RDONLY), ""); err != nil {
|
||||
switch err {
|
||||
case unix.EBUSY:
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
|
||||
}
|
||||
|
||||
// maskPath masks the top of the specified path inside a container to avoid
|
||||
// security issues from processes reading information from non-namespace aware
|
||||
// mounts ( proc/kcore ).
|
||||
// For files, maskPath bind mounts /dev/null over the top of the specified path.
|
||||
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
|
||||
func maskPath(path string) error {
|
||||
if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
|
||||
if err == unix.ENOTDIR {
|
||||
return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, "")
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
|
||||
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
|
||||
func writeSystemProperty(key, value string) error {
|
||||
keyPath := strings.Replace(key, ".", "/", -1)
|
||||
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
|
||||
}
|
||||
|
||||
func remount(m *configs.Mount, rootfs string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
if err := unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
// of propagation flags.
|
||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||
flags = m.Flags
|
||||
)
|
||||
if libcontainerUtils.CleanPath(dest) == "/dev" {
|
||||
flags &= ^unix.MS_RDONLY
|
||||
}
|
||||
|
||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
|
||||
if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, pflag := range m.PropagationFlags {
|
||||
if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
76
vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
generated
vendored
Normal file
76
vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
generated
vendored
Normal file
|
@ -0,0 +1,76 @@
|
|||
package seccomp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var operators = map[string]configs.Operator{
|
||||
"SCMP_CMP_NE": configs.NotEqualTo,
|
||||
"SCMP_CMP_LT": configs.LessThan,
|
||||
"SCMP_CMP_LE": configs.LessThanOrEqualTo,
|
||||
"SCMP_CMP_EQ": configs.EqualTo,
|
||||
"SCMP_CMP_GE": configs.GreaterThanOrEqualTo,
|
||||
"SCMP_CMP_GT": configs.GreaterThan,
|
||||
"SCMP_CMP_MASKED_EQ": configs.MaskEqualTo,
|
||||
}
|
||||
|
||||
var actions = map[string]configs.Action{
|
||||
"SCMP_ACT_KILL": configs.Kill,
|
||||
"SCMP_ACT_ERRNO": configs.Errno,
|
||||
"SCMP_ACT_TRAP": configs.Trap,
|
||||
"SCMP_ACT_ALLOW": configs.Allow,
|
||||
"SCMP_ACT_TRACE": configs.Trace,
|
||||
}
|
||||
|
||||
var archs = map[string]string{
|
||||
"SCMP_ARCH_X86": "x86",
|
||||
"SCMP_ARCH_X86_64": "amd64",
|
||||
"SCMP_ARCH_X32": "x32",
|
||||
"SCMP_ARCH_ARM": "arm",
|
||||
"SCMP_ARCH_AARCH64": "arm64",
|
||||
"SCMP_ARCH_MIPS": "mips",
|
||||
"SCMP_ARCH_MIPS64": "mips64",
|
||||
"SCMP_ARCH_MIPS64N32": "mips64n32",
|
||||
"SCMP_ARCH_MIPSEL": "mipsel",
|
||||
"SCMP_ARCH_MIPSEL64": "mipsel64",
|
||||
"SCMP_ARCH_MIPSEL64N32": "mipsel64n32",
|
||||
"SCMP_ARCH_PPC": "ppc",
|
||||
"SCMP_ARCH_PPC64": "ppc64",
|
||||
"SCMP_ARCH_PPC64LE": "ppc64le",
|
||||
"SCMP_ARCH_S390": "s390",
|
||||
"SCMP_ARCH_S390X": "s390x",
|
||||
}
|
||||
|
||||
// ConvertStringToOperator converts a string into a Seccomp comparison operator.
|
||||
// Comparison operators use the names they are assigned by Libseccomp's header.
|
||||
// Attempting to convert a string that is not a valid operator results in an
|
||||
// error.
|
||||
func ConvertStringToOperator(in string) (configs.Operator, error) {
|
||||
if op, ok := operators[in]; ok == true {
|
||||
return op, nil
|
||||
}
|
||||
return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
|
||||
}
|
||||
|
||||
// ConvertStringToAction converts a string into a Seccomp rule match action.
|
||||
// Actions use the names they are assigned in Libseccomp's header, though some
|
||||
// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
|
||||
// return errors.
|
||||
// Attempting to convert a string that is not a valid action results in an
|
||||
// error.
|
||||
func ConvertStringToAction(in string) (configs.Action, error) {
|
||||
if act, ok := actions[in]; ok == true {
|
||||
return act, nil
|
||||
}
|
||||
return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
|
||||
}
|
||||
|
||||
// ConvertStringToArch converts a string into a Seccomp comparison arch.
|
||||
func ConvertStringToArch(in string) (string, error) {
|
||||
if arch, ok := archs[in]; ok == true {
|
||||
return arch, nil
|
||||
}
|
||||
return "", fmt.Errorf("string %s is not a valid arch for seccomp", in)
|
||||
}
|
227
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
Normal file
227
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,227 @@
|
|||
// +build linux,cgo,seccomp
|
||||
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
actAllow = libseccomp.ActAllow
|
||||
actTrap = libseccomp.ActTrap
|
||||
actKill = libseccomp.ActKill
|
||||
actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
|
||||
actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
|
||||
)
|
||||
|
||||
// Filters given syscalls in a container, preventing them from being used
|
||||
// Started in the container init process, and carried over to all child processes
|
||||
// Setns calls, however, require a separate invocation, as they are not children
|
||||
// of the init until they join the namespace
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
if config == nil {
|
||||
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
|
||||
}
|
||||
|
||||
defaultAction, err := getAction(config.DefaultAction)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error initializing seccomp - invalid default action")
|
||||
}
|
||||
|
||||
filter, err := libseccomp.NewFilter(defaultAction)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating filter: %s", err)
|
||||
}
|
||||
|
||||
// Add extra architectures
|
||||
for _, arch := range config.Architectures {
|
||||
scmpArch, err := libseccomp.GetArchFromString(arch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := filter.AddArch(scmpArch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Unset no new privs bit
|
||||
if err := filter.SetNoNewPrivsBit(false); err != nil {
|
||||
return fmt.Errorf("error setting no new privileges: %s", err)
|
||||
}
|
||||
|
||||
// Add a rule for each syscall
|
||||
for _, call := range config.Syscalls {
|
||||
if call == nil {
|
||||
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
|
||||
}
|
||||
|
||||
if err = matchCall(filter, call); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err = filter.Load(); err != nil {
|
||||
return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns if the kernel has been configured to support seccomp.
|
||||
func IsEnabled() bool {
|
||||
// Try to read from /proc/self/status for kernels > 3.8
|
||||
s, err := parseStatusFile("/proc/self/status")
|
||||
if err != nil {
|
||||
// Check if Seccomp is supported, via CONFIG_SECCOMP.
|
||||
if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
|
||||
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
|
||||
if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
_, ok := s["Seccomp"]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Convert Libcontainer Action to Libseccomp ScmpAction
|
||||
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
|
||||
switch act {
|
||||
case configs.Kill:
|
||||
return actKill, nil
|
||||
case configs.Errno:
|
||||
return actErrno, nil
|
||||
case configs.Trap:
|
||||
return actTrap, nil
|
||||
case configs.Allow:
|
||||
return actAllow, nil
|
||||
case configs.Trace:
|
||||
return actTrace, nil
|
||||
default:
|
||||
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
|
||||
func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
|
||||
switch op {
|
||||
case configs.EqualTo:
|
||||
return libseccomp.CompareEqual, nil
|
||||
case configs.NotEqualTo:
|
||||
return libseccomp.CompareNotEqual, nil
|
||||
case configs.GreaterThan:
|
||||
return libseccomp.CompareGreater, nil
|
||||
case configs.GreaterThanOrEqualTo:
|
||||
return libseccomp.CompareGreaterEqual, nil
|
||||
case configs.LessThan:
|
||||
return libseccomp.CompareLess, nil
|
||||
case configs.LessThanOrEqualTo:
|
||||
return libseccomp.CompareLessOrEqual, nil
|
||||
case configs.MaskEqualTo:
|
||||
return libseccomp.CompareMaskedEqual, nil
|
||||
default:
|
||||
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Libcontainer Arg to Libseccomp ScmpCondition
|
||||
func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
|
||||
cond := libseccomp.ScmpCondition{}
|
||||
|
||||
if arg == nil {
|
||||
return cond, fmt.Errorf("cannot convert nil to syscall condition")
|
||||
}
|
||||
|
||||
op, err := getOperator(arg.Op)
|
||||
if err != nil {
|
||||
return cond, err
|
||||
}
|
||||
|
||||
return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
|
||||
}
|
||||
|
||||
// Add a rule to match a single syscall
|
||||
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
|
||||
if call == nil || filter == nil {
|
||||
return fmt.Errorf("cannot use nil as syscall to block")
|
||||
}
|
||||
|
||||
if len(call.Name) == 0 {
|
||||
return fmt.Errorf("empty string is not a valid syscall")
|
||||
}
|
||||
|
||||
// If we can't resolve the syscall, assume it's not supported on this kernel
|
||||
// Ignore it, don't error out
|
||||
callNum, err := libseccomp.GetSyscallFromName(call.Name)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert the call's action to the libseccomp equivalent
|
||||
callAct, err := getAction(call.Action)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Unconditional match - just add the rule
|
||||
if len(call.Args) == 0 {
|
||||
if err = filter.AddRule(callNum, callAct); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Conditional match - convert the per-arg rules into library format
|
||||
conditions := []libseccomp.ScmpCondition{}
|
||||
|
||||
for _, cond := range call.Args {
|
||||
newCond, err := getCondition(cond)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
conditions = append(conditions, newCond)
|
||||
}
|
||||
|
||||
if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseStatusFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
status := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
parts := strings.Split(text, ":")
|
||||
|
||||
if len(parts) <= 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
status[parts[0]] = parts[1]
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return status, nil
|
||||
}
|
24
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
Normal file
24
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
// +build !linux !cgo !seccomp
|
||||
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
|
||||
|
||||
// InitSeccomp does nothing because seccomp is not supported.
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
if config != nil {
|
||||
return ErrSeccompNotEnabled
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns false, because it is not supported.
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
11
vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go
generated
vendored
Normal file
11
vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
// +build linux,go1.5
|
||||
|
||||
package libcontainer
|
||||
|
||||
import "syscall"
|
||||
|
||||
// Set the GidMappingsEnableSetgroups member to true, so the process's
|
||||
// setgroups proc entry wont be set to 'deny' if GidMappings are set
|
||||
func enableSetgroups(sys *syscall.SysProcAttr) {
|
||||
sys.GidMappingsEnableSetgroups = true
|
||||
}
|
65
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
Normal file
65
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// linuxSetnsInit performs the container's initialization for running a new process
|
||||
// inside an existing container.
|
||||
type linuxSetnsInit struct {
|
||||
pipe *os.File
|
||||
consoleSocket *os.File
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) getSessionRingName() string {
|
||||
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
// do not inherit the parent's session keyring
|
||||
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.CreateConsole {
|
||||
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.Config.Seccomp != nil {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
27
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
generated
vendored
Normal file
27
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
generated
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
package stacktrace
|
||||
|
||||
import "runtime"
|
||||
|
||||
// Capture captures a stacktrace for the current calling go program
|
||||
//
|
||||
// skip is the number of frames to skip
|
||||
func Capture(userSkip int) Stacktrace {
|
||||
var (
|
||||
skip = userSkip + 1 // add one for our own function
|
||||
frames []Frame
|
||||
prevPc uintptr
|
||||
)
|
||||
for i := skip; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
//detect if caller is repeated to avoid loop, gccgo
|
||||
//currently runs into a loop without this check
|
||||
if !ok || pc == prevPc {
|
||||
break
|
||||
}
|
||||
frames = append(frames, NewFrame(pc, file, line))
|
||||
prevPc = pc
|
||||
}
|
||||
return Stacktrace{
|
||||
Frames: frames,
|
||||
}
|
||||
}
|
38
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
generated
vendored
Normal file
38
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
generated
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
package stacktrace
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NewFrame returns a new stack frame for the provided information
|
||||
func NewFrame(pc uintptr, file string, line int) Frame {
|
||||
fn := runtime.FuncForPC(pc)
|
||||
if fn == nil {
|
||||
return Frame{}
|
||||
}
|
||||
pack, name := parseFunctionName(fn.Name())
|
||||
return Frame{
|
||||
Line: line,
|
||||
File: filepath.Base(file),
|
||||
Package: pack,
|
||||
Function: name,
|
||||
}
|
||||
}
|
||||
|
||||
func parseFunctionName(name string) (string, string) {
|
||||
i := strings.LastIndex(name, ".")
|
||||
if i == -1 {
|
||||
return "", name
|
||||
}
|
||||
return name[:i], name[i+1:]
|
||||
}
|
||||
|
||||
// Frame contains all the information for a stack frame within a go program
|
||||
type Frame struct {
|
||||
File string
|
||||
Function string
|
||||
Package string
|
||||
Line int
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
generated
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
package stacktrace
|
||||
|
||||
type Stacktrace struct {
|
||||
Frames []Frame
|
||||
}
|
188
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
Normal file
188
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,188 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall" //only for Exec
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
pipe *os.File
|
||||
consoleSocket *os.File
|
||||
parentPid int
|
||||
stateDirFD int
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
||||
var newperms uint32
|
||||
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
|
||||
// with user ns we need 'other' search permissions
|
||||
newperms = 0x8
|
||||
} else {
|
||||
// without user ns we need 'UID' search permissions
|
||||
newperms = 0x80000
|
||||
}
|
||||
|
||||
// create a unique per session container name that we can
|
||||
// join in setns; however, other containers can also join it
|
||||
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
ringname, keepperms, newperms := l.getSessionRingParams()
|
||||
|
||||
// do not inherit the parent's session keyring
|
||||
sessKeyId, err := keys.JoinSessionKeyring(ringname)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// make session keyring searcheable
|
||||
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := setupNetwork(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRoute(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
label.Init()
|
||||
|
||||
// prepareRootfs() can be executed only for a new mount namespace.
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if err := prepareRootfs(l.pipe, l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Set up the console. This has to be done *before* we finalize the rootfs,
|
||||
// but *after* we've given the user the chance to set up all of the mounts
|
||||
// they wanted.
|
||||
if l.config.CreateConsole {
|
||||
if err := setupConsole(l.consoleSocket, l.config, true); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Finish the rootfs setup.
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if err := finalizeRootfs(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := unix.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for key, value := range l.config.Config.Sysctl {
|
||||
if err := writeSystemProperty(key, value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.ReadonlyPaths {
|
||||
if err := readonlyPath(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.MaskPaths {
|
||||
if err := maskPath(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
}
|
||||
// compare the parent from the initial start of the init process and make sure that it did not change.
|
||||
// if the parent changes that means it died and we were reparented to something else so we should
|
||||
// just kill ourself and not cause problems for someone else.
|
||||
if unix.Getppid() != l.parentPid {
|
||||
return unix.Kill(unix.Getpid(), unix.SIGKILL)
|
||||
}
|
||||
// check for the arg before waiting to make sure it exists and it is returned
|
||||
// as a create time error.
|
||||
name, err := exec.LookPath(l.config.Args[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// close the pipe to signal that we have completed our init.
|
||||
l.pipe.Close()
|
||||
// wait for the fifo to be opened on the other side before
|
||||
// exec'ing the users process.
|
||||
fd, err := unix.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "openat exec fifo")
|
||||
}
|
||||
if _, err := unix.Write(fd, []byte("0")); err != nil {
|
||||
return newSystemErrorWithCause(err, "write 0 exec fifo")
|
||||
}
|
||||
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return newSystemErrorWithCause(err, "init seccomp")
|
||||
}
|
||||
}
|
||||
// close the statedir fd before exec because the kernel resets dumpable in the wrong order
|
||||
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
|
||||
unix.Close(l.stateDirFD)
|
||||
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
||||
return newSystemErrorWithCause(err, "exec user process")
|
||||
}
|
||||
return nil
|
||||
}
|
248
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
Normal file
248
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,248 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func newStateTransitionError(from, to containerState) error {
|
||||
return &stateTransitionError{
|
||||
From: from.status().String(),
|
||||
To: to.status().String(),
|
||||
}
|
||||
}
|
||||
|
||||
// stateTransitionError is returned when an invalid state transition happens from one
|
||||
// state to another.
|
||||
type stateTransitionError struct {
|
||||
From string
|
||||
To string
|
||||
}
|
||||
|
||||
func (s *stateTransitionError) Error() string {
|
||||
return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
|
||||
}
|
||||
|
||||
type containerState interface {
|
||||
transition(containerState) error
|
||||
destroy() error
|
||||
status() Status
|
||||
}
|
||||
|
||||
func destroy(c *linuxContainer) error {
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
err := c.cgroupManager.Destroy()
|
||||
if rerr := os.RemoveAll(c.root); err == nil {
|
||||
err = rerr
|
||||
}
|
||||
c.initProcess = nil
|
||||
if herr := runPoststopHooks(c); err == nil {
|
||||
err = herr
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return err
|
||||
}
|
||||
|
||||
func runPoststopHooks(c *linuxContainer) error {
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Bundle: utils.SearchLabels(c.config.Labels, "bundle"),
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststop {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// stoppedState represents a container is a stopped/destroyed state.
|
||||
type stoppedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (b *stoppedState) status() Status {
|
||||
return Stopped
|
||||
}
|
||||
|
||||
func (b *stoppedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *restoredState:
|
||||
b.c.state = s
|
||||
return nil
|
||||
case *stoppedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(b, s)
|
||||
}
|
||||
|
||||
func (b *stoppedState) destroy() error {
|
||||
return destroy(b.c)
|
||||
}
|
||||
|
||||
// runningState represents a container that is currently running.
|
||||
type runningState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *runningState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *runningState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
t, err := r.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t == Running {
|
||||
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
|
||||
}
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *runningState) destroy() error {
|
||||
t, err := r.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t == Running {
|
||||
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
type createdState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (i *createdState) status() Status {
|
||||
return Created
|
||||
}
|
||||
|
||||
func (i *createdState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *pausedState, *stoppedState:
|
||||
i.c.state = s
|
||||
return nil
|
||||
case *createdState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(i, s)
|
||||
}
|
||||
|
||||
func (i *createdState) destroy() error {
|
||||
i.c.initProcess.signal(unix.SIGKILL)
|
||||
return destroy(i.c)
|
||||
}
|
||||
|
||||
// pausedState represents a container that is currently pause. It cannot be destroyed in a
|
||||
// paused state and must transition back to running first.
|
||||
type pausedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (p *pausedState) status() Status {
|
||||
return Paused
|
||||
}
|
||||
|
||||
func (p *pausedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *stoppedState:
|
||||
p.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(p, s)
|
||||
}
|
||||
|
||||
func (p *pausedState) destroy() error {
|
||||
t, err := p.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t != Running && t != Created {
|
||||
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
|
||||
return err
|
||||
}
|
||||
return destroy(p.c)
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
|
||||
}
|
||||
|
||||
// restoredState is the same as the running state but also has associated checkpoint
|
||||
// information that maybe need destroyed when the container is stopped and destroy is called.
|
||||
type restoredState struct {
|
||||
imageDir string
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *restoredState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *restoredState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState, *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *restoredState) destroy() error {
|
||||
if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
// loadedState is used whenever a container is restored, loaded, or setting additional
|
||||
// processes inside and it should not be destroyed when it is exiting.
|
||||
type loadedState struct {
|
||||
c *linuxContainer
|
||||
s Status
|
||||
}
|
||||
|
||||
func (n *loadedState) status() Status {
|
||||
return n.s
|
||||
}
|
||||
|
||||
func (n *loadedState) transition(s containerState) error {
|
||||
n.c.state = s
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *loadedState) destroy() error {
|
||||
if err := n.c.refreshState(); err != nil {
|
||||
return err
|
||||
}
|
||||
return n.c.state.destroy()
|
||||
}
|
15
vendor/github.com/opencontainers/runc/libcontainer/stats.go
generated
vendored
Normal file
15
vendor/github.com/opencontainers/runc/libcontainer/stats.go
generated
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
package libcontainer
|
||||
|
||||
type NetworkInterface struct {
|
||||
// Name is the name of the network interface.
|
||||
Name string
|
||||
|
||||
RxBytes uint64
|
||||
RxPackets uint64
|
||||
RxErrors uint64
|
||||
RxDropped uint64
|
||||
TxBytes uint64
|
||||
TxPackets uint64
|
||||
TxErrors uint64
|
||||
TxDropped uint64
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go
generated
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
package libcontainer
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
}
|
8
vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
generated
vendored
Normal file
8
vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
package libcontainer
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
CgroupStats *cgroups.Stats
|
||||
}
|
7
vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go
generated
vendored
Normal file
7
vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go
generated
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
package libcontainer
|
||||
|
||||
// Solaris - TODO
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go
generated
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
package libcontainer
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
}
|
107
vendor/github.com/opencontainers/runc/libcontainer/sync.go
generated
vendored
Normal file
107
vendor/github.com/opencontainers/runc/libcontainer/sync.go
generated
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type syncType string
|
||||
|
||||
// Constants that are used for synchronisation between the parent and child
|
||||
// during container setup. They come in pairs (with procError being a generic
|
||||
// response which is followed by a &genericError).
|
||||
//
|
||||
// [ child ] <-> [ parent ]
|
||||
//
|
||||
// procHooks --> [run hooks]
|
||||
// <-- procResume
|
||||
//
|
||||
// procConsole -->
|
||||
// <-- procConsoleReq
|
||||
// [send(fd)] --> [recv(fd)]
|
||||
// <-- procConsoleAck
|
||||
//
|
||||
// procReady --> [final setup]
|
||||
// <-- procRun
|
||||
const (
|
||||
procError syncType = "procError"
|
||||
procReady syncType = "procReady"
|
||||
procRun syncType = "procRun"
|
||||
procHooks syncType = "procHooks"
|
||||
procResume syncType = "procResume"
|
||||
)
|
||||
|
||||
type syncT struct {
|
||||
Type syncType `json:"type"`
|
||||
}
|
||||
|
||||
// writeSync is used to write to a synchronisation pipe. An error is returned
|
||||
// if there was a problem writing the payload.
|
||||
func writeSync(pipe io.Writer, sync syncType) error {
|
||||
if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// readSync is used to read from a synchronisation pipe. An error is returned
|
||||
// if we got a genericError, the pipe was closed, or we got an unexpected flag.
|
||||
func readSync(pipe io.Reader, expected syncType) error {
|
||||
var procSync syncT
|
||||
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
return fmt.Errorf("parent closed synchronisation channel")
|
||||
}
|
||||
|
||||
if procSync.Type == procError {
|
||||
var ierr genericError
|
||||
|
||||
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
|
||||
return fmt.Errorf("failed reading error from parent: %v", err)
|
||||
}
|
||||
|
||||
return &ierr
|
||||
}
|
||||
|
||||
if procSync.Type != expected {
|
||||
return fmt.Errorf("invalid synchronisation flag from parent")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseSync runs the given callback function on each syncT received from the
|
||||
// child. It will return once io.EOF is returned from the given pipe.
|
||||
func parseSync(pipe io.Reader, fn func(*syncT) error) error {
|
||||
dec := json.NewDecoder(pipe)
|
||||
for {
|
||||
var sync syncT
|
||||
if err := dec.Decode(&sync); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// We handle this case outside fn for cleanliness reasons.
|
||||
var ierr *genericError
|
||||
if sync.Type == procError {
|
||||
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemErrorWithCause(err, "decoding proc error from init")
|
||||
}
|
||||
if ierr != nil {
|
||||
return ierr
|
||||
}
|
||||
// Programmer error.
|
||||
panic("No error following JSON procError payload.")
|
||||
}
|
||||
|
||||
if err := fn(&sync); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Reference in a new issue