bump runc@b263a43430ac6996a4302b891688544225197294
Signed-off-by: Antonio Murdaca <runcom@redhat.com>
This commit is contained in:
parent
73a0881dbb
commit
c258a2d8f0
386 changed files with 9394 additions and 39467 deletions
399
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
399
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
|
@ -22,6 +22,7 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/criurpc"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
|
@ -30,18 +31,18 @@ import (
|
|||
const stdioFdCount = 3
|
||||
|
||||
type linuxContainer struct {
|
||||
id string
|
||||
root string
|
||||
config *configs.Config
|
||||
cgroupManager cgroups.Manager
|
||||
initPath string
|
||||
initArgs []string
|
||||
initProcess parentProcess
|
||||
criuPath string
|
||||
m sync.Mutex
|
||||
criuVersion int
|
||||
state containerState
|
||||
created time.Time
|
||||
id string
|
||||
root string
|
||||
config *configs.Config
|
||||
cgroupManager cgroups.Manager
|
||||
initArgs []string
|
||||
initProcess parentProcess
|
||||
initProcessStartTime string
|
||||
criuPath string
|
||||
m sync.Mutex
|
||||
criuVersion int
|
||||
state containerState
|
||||
created time.Time
|
||||
}
|
||||
|
||||
// State represents a running container's state
|
||||
|
@ -62,7 +63,7 @@ type State struct {
|
|||
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
|
||||
}
|
||||
|
||||
// A libcontainer container object.
|
||||
// Container is a libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
|
@ -84,13 +85,14 @@ type Container interface {
|
|||
// Systemerror - System error.
|
||||
Restore(process *Process, criuOpts *CriuOpts) error
|
||||
|
||||
// If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses
|
||||
// If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses
|
||||
// the execution of any user processes. Asynchronously, when the container finished being paused the
|
||||
// state is changed to PAUSED.
|
||||
// If the Container state is PAUSED, do nothing.
|
||||
//
|
||||
// errors:
|
||||
// ContainerDestroyed - Container no longer exists,
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ContainerNotRunning - Container not running or created,
|
||||
// Systemerror - System error.
|
||||
Pause() error
|
||||
|
||||
|
@ -99,7 +101,8 @@ type Container interface {
|
|||
// If the Container state is RUNNING, do nothing.
|
||||
//
|
||||
// errors:
|
||||
// ContainerDestroyed - Container no longer exists,
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ContainerNotPaused - Container is not paused,
|
||||
// Systemerror - System error.
|
||||
Resume() error
|
||||
|
||||
|
@ -141,7 +144,7 @@ func (c *linuxContainer) State() (*State, error) {
|
|||
func (c *linuxContainer) Processes() ([]int, error) {
|
||||
pids, err := c.cgroupManager.GetAllPids()
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
|
||||
}
|
||||
return pids, nil
|
||||
}
|
||||
|
@ -152,14 +155,14 @@ func (c *linuxContainer) Stats() (*Stats, error) {
|
|||
stats = &Stats{}
|
||||
)
|
||||
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
|
||||
return stats, newSystemError(err)
|
||||
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
|
||||
}
|
||||
for _, iface := range c.config.Networks {
|
||||
switch iface.Type {
|
||||
case "veth":
|
||||
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
|
||||
if err != nil {
|
||||
return stats, newSystemError(err)
|
||||
return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName)
|
||||
}
|
||||
stats.Interfaces = append(stats.Interfaces, istats)
|
||||
}
|
||||
|
@ -170,6 +173,13 @@ func (c *linuxContainer) Stats() (*Stats, error) {
|
|||
func (c *linuxContainer) Set(config configs.Config) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
}
|
||||
c.config = &config
|
||||
return c.cgroupManager.Set(c.config)
|
||||
}
|
||||
|
@ -181,42 +191,89 @@ func (c *linuxContainer) Start(process *Process) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
doInit := status == Destroyed
|
||||
parent, err := c.newParentProcess(process, doInit)
|
||||
return c.start(process, status == Stopped)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Run(process *Process) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return newSystemError(err)
|
||||
return err
|
||||
}
|
||||
if err := c.start(process, status == Stopped); err != nil {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
return c.exec()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Exec() error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
return c.exec()
|
||||
}
|
||||
|
||||
func (c *linuxContainer) exec() error {
|
||||
path := filepath.Join(c.root, execFifoFilename)
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "open exec fifo for reading")
|
||||
}
|
||||
defer f.Close()
|
||||
data, err := ioutil.ReadAll(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(data) > 0 {
|
||||
os.Remove(path)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("cannot start an already running container")
|
||||
}
|
||||
|
||||
func (c *linuxContainer) start(process *Process, isInit bool) error {
|
||||
parent, err := c.newParentProcess(process, isInit)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "creating new parent process")
|
||||
}
|
||||
if err := parent.start(); err != nil {
|
||||
// terminate the process to ensure that it properly is reaped.
|
||||
if err := parent.terminate(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return newSystemError(err)
|
||||
return newSystemErrorWithCause(err, "starting container process")
|
||||
}
|
||||
// generate a timestamp indicating when the container was started
|
||||
c.created = time.Now().UTC()
|
||||
|
||||
c.state = &runningState{
|
||||
c: c,
|
||||
}
|
||||
if doInit {
|
||||
if err := c.updateState(parent); err != nil {
|
||||
if isInit {
|
||||
c.state = &createdState{
|
||||
c: c,
|
||||
}
|
||||
state, err := c.updateState(parent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.initProcessStartTime = state.InitProcessStartTime
|
||||
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: parent.pid(),
|
||||
Root: c.config.Rootfs,
|
||||
BundlePath: utils.SearchLabels(c.config.Labels, "bundle"),
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststart {
|
||||
for i, hook := range c.config.Hooks.Poststart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
if err := parent.terminate(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return newSystemError(err)
|
||||
return newSystemErrorWithCausef(err, "running poststart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -224,9 +281,12 @@ func (c *linuxContainer) Start(process *Process) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Signal(s os.Signal) error {
|
||||
func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
||||
if all {
|
||||
return signalAllProcesses(c.cgroupManager, s)
|
||||
}
|
||||
if err := c.initProcess.signal(s); err != nil {
|
||||
return newSystemError(err)
|
||||
return newSystemErrorWithCause(err, "signaling init process")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -234,23 +294,32 @@ func (c *linuxContainer) Signal(s os.Signal) error {
|
|||
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
|
||||
parentPipe, childPipe, err := newPipe()
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe)
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
if !doInit {
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
|
||||
}
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe)
|
||||
|
||||
// We only set up rootDir if we're not doing a `runc exec`. The reason for
|
||||
// this is to avoid cases where a racing, unprivileged process inside the
|
||||
// container can get access to the statedir file descriptor (which would
|
||||
// allow for container rootfs escape).
|
||||
rootDir, err := os.Open(c.root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, rootDir)
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
|
||||
cmd := &exec.Cmd{
|
||||
Path: c.initPath,
|
||||
Args: c.initArgs,
|
||||
}
|
||||
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
|
||||
cmd.Stdin = p.Stdin
|
||||
cmd.Stdout = p.Stdout
|
||||
cmd.Stderr = p.Stderr
|
||||
|
@ -259,7 +328,8 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
|
||||
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||||
// even with the parent still running.
|
||||
|
@ -269,7 +339,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
return cmd, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
|
||||
nsMaps := make(map[configs.NamespaceType]string)
|
||||
for _, ns := range c.config.Namespaces {
|
||||
|
@ -278,10 +348,11 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
|||
}
|
||||
}
|
||||
_, sharePidns := nsMaps[configs.NEWPID]
|
||||
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, "")
|
||||
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.consoleChan = make(chan *os.File, 1)
|
||||
return &initProcess{
|
||||
cmd: cmd,
|
||||
childPipe: childPipe,
|
||||
|
@ -292,6 +363,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
|||
process: p,
|
||||
bootstrapData: data,
|
||||
sharePidns: sharePidns,
|
||||
rootDir: rootDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -299,15 +371,16 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
|||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, newSystemErrorWithCause(err, "getting container's current state")
|
||||
}
|
||||
// for setns process, we dont have to set cloneflags as the process namespaces
|
||||
// for setns process, we don't have to set cloneflags as the process namespaces
|
||||
// will only be set via setns syscall
|
||||
data, err := c.bootstrapData(0, state.NamespacePaths, p.consolePath)
|
||||
data, err := c.bootstrapData(0, state.NamespacePaths)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// TODO: set on container for process management
|
||||
p.consoleChan = make(chan *os.File, 1)
|
||||
return &setnsProcess{
|
||||
cmd: cmd,
|
||||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
|
@ -325,8 +398,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
|||
Args: process.Args,
|
||||
Env: process.Env,
|
||||
User: process.User,
|
||||
AdditionalGroups: process.AdditionalGroups,
|
||||
Cwd: process.Cwd,
|
||||
Console: process.consolePath,
|
||||
Capabilities: process.Capabilities,
|
||||
PassedFilesCount: len(process.ExtraFiles),
|
||||
ContainerId: c.ID(),
|
||||
|
@ -334,6 +407,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
|||
AppArmorProfile: c.config.AppArmorProfile,
|
||||
ProcessLabel: c.config.ProcessLabel,
|
||||
Rlimits: c.config.Rlimits,
|
||||
ExecFifoPath: filepath.Join(c.root, execFifoFilename),
|
||||
}
|
||||
if process.NoNewPrivileges != nil {
|
||||
cfg.NoNewPrivileges = *process.NoNewPrivileges
|
||||
|
@ -347,6 +421,17 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
|||
if len(process.Rlimits) > 0 {
|
||||
cfg.Rlimits = process.Rlimits
|
||||
}
|
||||
/*
|
||||
* TODO: This should not be automatically computed. We should implement
|
||||
* this as a field in libcontainer.Process, and then we only dup the
|
||||
* new console over the file descriptors which were not explicitly
|
||||
* set with process.Std{in,out,err}. The reason I've left this as-is
|
||||
* is because the GetConsole() interface is new, there's no need to
|
||||
* polish this interface right now.
|
||||
*/
|
||||
if process.Stdin == nil && process.Stdout == nil && process.Stderr == nil {
|
||||
cfg.CreateConsole = true
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
|
@ -371,15 +456,16 @@ func (c *linuxContainer) Pause() error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status != Running {
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
switch status {
|
||||
case Running, Created:
|
||||
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.state.transition(&pausedState{
|
||||
c: c,
|
||||
})
|
||||
}
|
||||
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.state.transition(&pausedState{
|
||||
c: c,
|
||||
})
|
||||
return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Resume() error {
|
||||
|
@ -408,13 +494,13 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
|
|||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
}
|
||||
|
||||
// check Criu version greater than or equal to min_version
|
||||
func (c *linuxContainer) checkCriuVersion(min_version string) error {
|
||||
// checkCriuVersion checks Criu version greater than or equal to minVersion
|
||||
func (c *linuxContainer) checkCriuVersion(minVersion string) error {
|
||||
var x, y, z, versionReq int
|
||||
|
||||
_, err := fmt.Sscanf(min_version, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
|
||||
_, err := fmt.Sscanf(minVersion, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
|
||||
if err != nil {
|
||||
_, err = fmt.Sscanf(min_version, "Version: %d.%d\n", &x, &y) // 1.6
|
||||
_, err = fmt.Sscanf(minVersion, "Version: %d.%d\n", &x, &y) // 1.6
|
||||
}
|
||||
versionReq = x*10000 + y*100 + z
|
||||
|
||||
|
@ -459,7 +545,7 @@ func (c *linuxContainer) checkCriuVersion(min_version string) error {
|
|||
c.criuVersion = x*10000 + y*100 + z
|
||||
|
||||
if c.criuVersion < versionReq {
|
||||
return fmt.Errorf("CRIU version must be %s or higher", min_version)
|
||||
return fmt.Errorf("CRIU version must be %s or higher", minVersion)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -480,6 +566,29 @@ func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount
|
|||
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
|
||||
for _, path := range c.config.MaskPaths {
|
||||
fi, err := os.Stat(fmt.Sprintf("/proc/%d/root/%s", c.initProcess.pid(), path))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
if fi.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
extMnt := &criurpc.ExtMountMap{
|
||||
Key: proto.String(path),
|
||||
Val: proto.String("/dev/null"),
|
||||
}
|
||||
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
@ -575,6 +684,15 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
}
|
||||
}
|
||||
|
||||
if err := c.addMaskPaths(req); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, node := range c.config.Devices {
|
||||
m := &configs.Mount{Destination: node.Path, Source: node.Path}
|
||||
c.addCriuDumpMount(req, m)
|
||||
}
|
||||
|
||||
// Write the FD info to a file in the image directory
|
||||
|
||||
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
|
||||
|
@ -607,6 +725,27 @@ func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mo
|
|||
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) {
|
||||
for _, iface := range c.config.Networks {
|
||||
switch iface.Type {
|
||||
case "veth":
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(iface.HostInterfaceName)
|
||||
veth.IfIn = proto.String(iface.Name)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
break
|
||||
case "loopback":
|
||||
break
|
||||
}
|
||||
}
|
||||
for _, i := range criuOpts.VethPairs {
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(i.HostInterfaceName)
|
||||
veth.IfIn = proto.String(i.ContainerInterfaceName)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
@ -690,23 +829,19 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
break
|
||||
}
|
||||
}
|
||||
for _, iface := range c.config.Networks {
|
||||
switch iface.Type {
|
||||
case "veth":
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(iface.HostInterfaceName)
|
||||
veth.IfIn = proto.String(iface.Name)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
break
|
||||
case "loopback":
|
||||
break
|
||||
}
|
||||
|
||||
if len(c.config.MaskPaths) > 0 {
|
||||
m := &configs.Mount{Destination: "/dev/null", Source: "/dev/null"}
|
||||
c.addCriuRestoreMount(req, m)
|
||||
}
|
||||
for _, i := range criuOpts.VethPairs {
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(i.HostInterfaceName)
|
||||
veth.IfIn = proto.String(i.ContainerInterfaceName)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
|
||||
for _, node := range c.config.Devices {
|
||||
m := &configs.Mount{Destination: node.Path, Source: node.Path}
|
||||
c.addCriuRestoreMount(req, m)
|
||||
}
|
||||
|
||||
if criuOpts.EmptyNs&syscall.CLONE_NEWNET == 0 {
|
||||
c.restoreNetwork(req, criuOpts)
|
||||
}
|
||||
|
||||
// append optional manage cgroups mode
|
||||
|
@ -950,14 +1085,14 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
|||
case notify.GetScript() == "setup-namespaces":
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: int(notify.GetPid()),
|
||||
Root: c.config.Rootfs,
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: int(notify.GetPid()),
|
||||
BundlePath: utils.SearchLabels(c.config.Labels, "bundle"),
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Prestart {
|
||||
for i, hook := range c.config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemError(err)
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -974,7 +1109,9 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
|||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.updateState(r); err != nil {
|
||||
// create a timestamp indicating when the restored checkpoint was started
|
||||
c.created = time.Now().UTC()
|
||||
if _, err := c.updateState(r); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil {
|
||||
|
@ -986,13 +1123,17 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) updateState(process parentProcess) error {
|
||||
func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
|
||||
c.initProcess = process
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
return c.saveState(state)
|
||||
err = c.saveState(state)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) saveState(s *State) error {
|
||||
|
@ -1027,37 +1168,75 @@ func (c *linuxContainer) refreshState() error {
|
|||
if paused {
|
||||
return c.state.transition(&pausedState{c: c})
|
||||
}
|
||||
running, err := c.isRunning()
|
||||
t, err := c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if running {
|
||||
switch t {
|
||||
case Created:
|
||||
return c.state.transition(&createdState{c: c})
|
||||
case Running:
|
||||
return c.state.transition(&runningState{c: c})
|
||||
}
|
||||
return c.state.transition(&stoppedState{c: c})
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isRunning() (bool, error) {
|
||||
if c.initProcess == nil {
|
||||
// doesInitProcessExist checks if the init process is still the same process
|
||||
// as the initial one, it could happen that the original process has exited
|
||||
// and a new process has been created with the same pid, in this case, the
|
||||
// container would already be stopped.
|
||||
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
|
||||
startTime, err := system.GetProcessStartTime(initPid)
|
||||
if err != nil {
|
||||
return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid)
|
||||
}
|
||||
if c.initProcessStartTime != startTime {
|
||||
return false, nil
|
||||
}
|
||||
// return Running if the init process is alive
|
||||
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemError(err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) runType() (Status, error) {
|
||||
if c.initProcess == nil {
|
||||
return Stopped, nil
|
||||
}
|
||||
pid := c.initProcess.pid()
|
||||
// return Running if the init process is alive
|
||||
if err := syscall.Kill(pid, 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
// It means the process does not exist anymore, could happen when the
|
||||
// process exited just when we call the function, we should not return
|
||||
// error in this case.
|
||||
return Stopped, nil
|
||||
}
|
||||
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
|
||||
}
|
||||
// check if the process is still the original init process.
|
||||
exist, err := c.doesInitProcessExist(pid)
|
||||
if !exist || err != nil {
|
||||
return Stopped, err
|
||||
}
|
||||
// check if the process that is running is the init process or the user's process.
|
||||
// this is the difference between the container Running and Created.
|
||||
environ, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/environ", pid))
|
||||
if err != nil {
|
||||
return Stopped, newSystemErrorWithCausef(err, "reading /proc/%d/environ", pid)
|
||||
}
|
||||
check := []byte("_LIBCONTAINER")
|
||||
if bytes.Contains(environ, check) {
|
||||
return Created, nil
|
||||
}
|
||||
return Running, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isPaused() (bool, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(c.cgroupManager.GetPaths()["freezer"], "freezer.state"))
|
||||
if err != nil {
|
||||
// If freezer cgroup is not mounted, the container would just be not paused.
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemError(err)
|
||||
return false, newSystemErrorWithCause(err, "checking if container is paused")
|
||||
}
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil
|
||||
}
|
||||
|
@ -1106,16 +1285,22 @@ func (c *linuxContainer) currentState() (*State, error) {
|
|||
// can setns in order.
|
||||
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
|
||||
paths := []string{}
|
||||
nsTypes := []configs.NamespaceType{
|
||||
order := []configs.NamespaceType{
|
||||
// The user namespace *must* be done first.
|
||||
configs.NEWUSER,
|
||||
configs.NEWIPC,
|
||||
configs.NEWUTS,
|
||||
configs.NEWNET,
|
||||
configs.NEWPID,
|
||||
configs.NEWNS,
|
||||
}
|
||||
// join userns if the init process explicitly requires NEWUSER
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
nsTypes = append(nsTypes, configs.NEWUSER)
|
||||
|
||||
// Remove namespaces that we don't need to join.
|
||||
var nsTypes []configs.NamespaceType
|
||||
for _, ns := range order {
|
||||
if c.config.Namespaces.Contains(ns) {
|
||||
nsTypes = append(nsTypes, ns)
|
||||
}
|
||||
}
|
||||
for _, nsType := range nsTypes {
|
||||
if p, ok := namespaces[nsType]; ok && p != "" {
|
||||
|
@ -1125,14 +1310,14 @@ func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceTyp
|
|||
}
|
||||
// only set to join this namespace if it exists
|
||||
if _, err := os.Lstat(p); err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p)
|
||||
}
|
||||
// do not allow namespace path with comma as we use it to separate
|
||||
// the namespace paths
|
||||
if strings.ContainsRune(p, ',') {
|
||||
return nil, newSystemError(fmt.Errorf("invalid path %s", p))
|
||||
}
|
||||
paths = append(paths, p)
|
||||
paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(nsType), p))
|
||||
}
|
||||
}
|
||||
return paths, nil
|
||||
|
@ -1155,7 +1340,7 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
|
|||
// such as one that uses nsenter package to bootstrap the container's
|
||||
// init process correctly, i.e. with correct namespaces, uid/gid
|
||||
// mapping etc.
|
||||
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, consolePath string) (io.Reader, error) {
|
||||
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
|
||||
// create the netlink message
|
||||
r := nl.NewNetlinkRequest(int(InitMsg), 0)
|
||||
|
||||
|
@ -1165,14 +1350,6 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
|
|||
Value: uint32(cloneFlags),
|
||||
})
|
||||
|
||||
// write console path
|
||||
if consolePath != "" {
|
||||
r.AddData(&Bytemsg{
|
||||
Type: ConsolePathAttr,
|
||||
Value: []byte(consolePath),
|
||||
})
|
||||
}
|
||||
|
||||
// write custom namespace paths
|
||||
if len(nsMaps) > 0 {
|
||||
nsPaths, err := c.orderNamespacePaths(nsMaps)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue