Delete shim
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
21a53c1d70
commit
e31a99c08a
23 changed files with 0 additions and 1608 deletions
|
@ -1,56 +0,0 @@
|
|||
// +build !solaris
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// NewConsole returns an initialized console that can be used within a container by copying bytes
|
||||
// from the master side to the slave that is attached as the tty for the container's init process.
|
||||
func newConsole(uid, gid int) (*os.File, string, error) {
|
||||
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
console, err := ptsname(master)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if err := unlockpt(master); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if err := os.Chmod(console, 0600); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if err := os.Chown(console, uid, gid); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
return master, console, nil
|
||||
}
|
||||
|
||||
func ioctl(fd uintptr, flag, data uintptr) error {
|
||||
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
|
||||
// unlockpt should be called before opening the slave side of a pty.
|
||||
func unlockpt(f *os.File) error {
|
||||
var u int32
|
||||
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
|
||||
}
|
||||
|
||||
// ptsname retrieves the name of the first available pts for the given master.
|
||||
func ptsname(f *os.File) (string, error) {
|
||||
var n int32
|
||||
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("/dev/pts/%d", n), nil
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
// +build solaris
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
)
|
||||
|
||||
// NewConsole returns an initalized console that can be used within a container by copying bytes
|
||||
// from the master side to the slave that is attached as the tty for the container's init process.
|
||||
func newConsole(uid, gid int) (*os.File, string, error) {
|
||||
return nil, "", errors.New("newConsole not implemented on Solaris")
|
||||
}
|
|
@ -1,169 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/containerd/sys"
|
||||
"github.com/docker/docker/pkg/term"
|
||||
)
|
||||
|
||||
func writeMessage(f *os.File, level string, err error) {
|
||||
fmt.Fprintf(f, `{"level": "%s","msg": "%s"}`, level, err)
|
||||
}
|
||||
|
||||
type controlMessage struct {
|
||||
Type int
|
||||
Width int
|
||||
Height int
|
||||
}
|
||||
|
||||
// containerd-shim is a small shim that sits in front of a runtime implementation
|
||||
// that allows it to be repartented to init and handle reattach from the caller.
|
||||
//
|
||||
// the cwd of the shim should be the path to the state directory where the shim
|
||||
// can locate fifos and other information.
|
||||
// Arg0: id of the container
|
||||
// Arg1: bundle path
|
||||
// Arg2: runtime binary
|
||||
func main() {
|
||||
flag.Parse()
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
f, err := os.OpenFile(filepath.Join(cwd, "shim-log.json"), os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0666)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := start(f); err != nil {
|
||||
// this means that the runtime failed starting the container and will have the
|
||||
// proper error messages in the runtime log so we should to treat this as a
|
||||
// shim failure because the sim executed properly
|
||||
if err == errRuntime {
|
||||
f.Close()
|
||||
return
|
||||
}
|
||||
// log the error instead of writing to stderr because the shim will have
|
||||
// /dev/null as it's stdio because it is supposed to be reparented to system
|
||||
// init and will not have anyone to read from it
|
||||
writeMessage(f, "error", err)
|
||||
f.Close()
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func start(log *os.File) error {
|
||||
// start handling signals as soon as possible so that things are properly reaped
|
||||
// or if runtime exits before we hit the handler
|
||||
signals := make(chan os.Signal, 2048)
|
||||
signal.Notify(signals)
|
||||
// set the shim as the subreaper for all orphaned processes created by the container
|
||||
if err := sys.SetSubreaper(1); err != nil {
|
||||
return err
|
||||
}
|
||||
// open the exit pipe
|
||||
f, err := os.OpenFile("exit", syscall.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open exit fifo %s", err)
|
||||
}
|
||||
defer f.Close()
|
||||
control, err := os.OpenFile("control", syscall.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open control fifo %s", err)
|
||||
}
|
||||
defer control.Close()
|
||||
p, err := newProcess(flag.Arg(0), flag.Arg(1), flag.Arg(2))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err := p.Close(); err != nil {
|
||||
writeMessage(log, "warn", fmt.Errorf("close stdio %s", err))
|
||||
}
|
||||
}()
|
||||
if err := p.create(); err != nil {
|
||||
p.delete()
|
||||
return err
|
||||
}
|
||||
msgC := make(chan controlMessage, 32)
|
||||
go func() {
|
||||
for {
|
||||
var m controlMessage
|
||||
if _, err := fmt.Fscanf(control, "%d %d %d\n", &m.Type, &m.Width, &m.Height); err != nil {
|
||||
continue
|
||||
}
|
||||
msgC <- m
|
||||
}
|
||||
}()
|
||||
var exitShim bool
|
||||
for {
|
||||
select {
|
||||
case s := <-signals:
|
||||
switch s {
|
||||
case syscall.SIGCHLD:
|
||||
exits, _ := sys.Reap(false)
|
||||
for _, e := range exits {
|
||||
// check to see if runtime is one of the processes that has exited
|
||||
if e.Pid == p.pid() {
|
||||
exitShim = true
|
||||
writeInt("exitStatus", e.Status)
|
||||
}
|
||||
}
|
||||
}
|
||||
// runtime has exited so the shim can also exit
|
||||
if exitShim {
|
||||
// Let containerd take care of calling the runtime
|
||||
// delete.
|
||||
// This is needed to be done first in order to ensure
|
||||
// that the call to Reap does not block until all
|
||||
// children of the container have died if init was not
|
||||
// started in its own PID namespace.
|
||||
f.Close()
|
||||
// Wait for all the childs this process may have
|
||||
// created (needed for exec and init processes when
|
||||
// they join another pid namespace)
|
||||
p.Wait()
|
||||
return nil
|
||||
}
|
||||
case msg := <-msgC:
|
||||
switch msg.Type {
|
||||
case 0:
|
||||
// close stdin
|
||||
if p.stdinCloser != nil {
|
||||
p.stdinCloser.Close()
|
||||
}
|
||||
case 1:
|
||||
// resize
|
||||
if p.console == nil {
|
||||
continue
|
||||
}
|
||||
ws := term.Winsize{
|
||||
Width: uint16(msg.Width),
|
||||
Height: uint16(msg.Height),
|
||||
}
|
||||
term.SetWinsize(p.console.Fd(), &ws)
|
||||
case 2:
|
||||
// signal
|
||||
if err := syscall.Kill(p.pid(), syscall.Signal(msg.Width)); err != nil {
|
||||
writeMessage(log, "warn", fmt.Errorf("signal pid %d: %s", msg.Width, err))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeInt(path string, i int) error {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
_, err = fmt.Fprintf(f, "%d", i)
|
||||
return err
|
||||
}
|
|
@ -1,369 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
var errRuntime = errors.New("shim: runtime execution error")
|
||||
|
||||
type checkpoint struct {
|
||||
// Timestamp is the time that checkpoint happened
|
||||
Created time.Time `json:"created"`
|
||||
// Name is the name of the checkpoint
|
||||
Name string `json:"name"`
|
||||
// TCP checkpoints open tcp connections
|
||||
TCP bool `json:"tcp"`
|
||||
// UnixSockets persists unix sockets in the checkpoint
|
||||
UnixSockets bool `json:"unixSockets"`
|
||||
// Shell persists tty sessions in the checkpoint
|
||||
Shell bool `json:"shell"`
|
||||
// Exit exits the container after the checkpoint is finished
|
||||
Exit bool `json:"exit"`
|
||||
// EmptyNS tells CRIU not to restore a particular namespace
|
||||
EmptyNS []string `json:"emptyNS,omitempty"`
|
||||
}
|
||||
|
||||
type processState struct {
|
||||
specs.Process
|
||||
Exec bool `json:"exec"`
|
||||
Stdin string `json:"stdin"`
|
||||
Stdout string `json:"stdout"`
|
||||
Stderr string `json:"stderr"`
|
||||
RuntimeArgs []string `json:"runtimeArgs"`
|
||||
NoPivotRoot bool `json:"noPivotRoot"`
|
||||
CheckpointPath string `json:"checkpoint"`
|
||||
RootUID int `json:"rootUID"`
|
||||
RootGID int `json:"rootGID"`
|
||||
}
|
||||
|
||||
type process struct {
|
||||
sync.WaitGroup
|
||||
id string
|
||||
bundle string
|
||||
stdio *stdio
|
||||
exec bool
|
||||
containerPid int
|
||||
checkpoint *checkpoint
|
||||
checkpointPath string
|
||||
shimIO *IO
|
||||
stdinCloser io.Closer
|
||||
console *os.File
|
||||
consolePath string
|
||||
state *processState
|
||||
runtime string
|
||||
}
|
||||
|
||||
func newProcess(id, bundle, runtimeName string) (*process, error) {
|
||||
p := &process{
|
||||
id: id,
|
||||
bundle: bundle,
|
||||
runtime: runtimeName,
|
||||
}
|
||||
s, err := loadProcess()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load process from json %s", err)
|
||||
}
|
||||
p.state = s
|
||||
if s.CheckpointPath != "" {
|
||||
cpt, err := loadCheckpoint(s.CheckpointPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.checkpoint = cpt
|
||||
p.checkpointPath = s.CheckpointPath
|
||||
}
|
||||
if err := p.openIO(); err != nil {
|
||||
return nil, fmt.Errorf("open IO for container %s", err)
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func loadProcess() (*processState, error) {
|
||||
f, err := os.Open("process.json")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var s processState
|
||||
if err := json.NewDecoder(f).Decode(&s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &s, nil
|
||||
}
|
||||
|
||||
func loadCheckpoint(checkpointPath string) (*checkpoint, error) {
|
||||
f, err := os.Open(filepath.Join(checkpointPath, "config.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var cpt checkpoint
|
||||
if err := json.NewDecoder(f).Decode(&cpt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cpt, nil
|
||||
}
|
||||
|
||||
func (p *process) create() error {
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logPath := filepath.Join(cwd, "log.json")
|
||||
args := append([]string{
|
||||
"--log", logPath,
|
||||
"--log-format", "json",
|
||||
}, p.state.RuntimeArgs...)
|
||||
if p.state.Exec {
|
||||
args = append(args, "exec",
|
||||
"-d",
|
||||
"--process", filepath.Join(cwd, "process.json"),
|
||||
"--console", p.consolePath,
|
||||
)
|
||||
} else if p.checkpoint != nil {
|
||||
args = append(args, "restore",
|
||||
"-d",
|
||||
"--image-path", p.checkpointPath,
|
||||
"--work-path", filepath.Join(p.checkpointPath, "criu.work", "restore-"+time.Now().Format(time.RFC3339)),
|
||||
)
|
||||
add := func(flags ...string) {
|
||||
args = append(args, flags...)
|
||||
}
|
||||
if p.checkpoint.Shell {
|
||||
add("--shell-job")
|
||||
}
|
||||
if p.checkpoint.TCP {
|
||||
add("--tcp-established")
|
||||
}
|
||||
if p.checkpoint.UnixSockets {
|
||||
add("--ext-unix-sk")
|
||||
}
|
||||
if p.state.NoPivotRoot {
|
||||
add("--no-pivot")
|
||||
}
|
||||
for _, ns := range p.checkpoint.EmptyNS {
|
||||
add("--empty-ns", ns)
|
||||
}
|
||||
|
||||
} else {
|
||||
args = append(args, "create",
|
||||
"--bundle", p.bundle,
|
||||
"--console", p.consolePath,
|
||||
)
|
||||
if p.state.NoPivotRoot {
|
||||
args = append(args, "--no-pivot")
|
||||
}
|
||||
}
|
||||
args = append(args,
|
||||
"--pid-file", filepath.Join(cwd, "pid"),
|
||||
p.id,
|
||||
)
|
||||
cmd := exec.Command(p.runtime, args...)
|
||||
cmd.Dir = p.bundle
|
||||
cmd.Stdin = p.stdio.stdin
|
||||
cmd.Stdout = p.stdio.stdout
|
||||
cmd.Stderr = p.stdio.stderr
|
||||
// Call out to setPDeathSig to set SysProcAttr as elements are platform specific
|
||||
cmd.SysProcAttr = setPDeathSig()
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
if exErr, ok := err.(*exec.Error); ok {
|
||||
if exErr.Err == exec.ErrNotFound || exErr.Err == os.ErrNotExist {
|
||||
return fmt.Errorf("%s not installed on system", p.runtime)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
p.stdio.stdout.Close()
|
||||
p.stdio.stderr.Close()
|
||||
if err := cmd.Wait(); err != nil {
|
||||
if _, ok := err.(*exec.ExitError); ok {
|
||||
return errRuntime
|
||||
}
|
||||
return err
|
||||
}
|
||||
data, err := ioutil.ReadFile("pid")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pid, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.containerPid = pid
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *process) pid() int {
|
||||
return p.containerPid
|
||||
}
|
||||
|
||||
func (p *process) delete() error {
|
||||
if !p.state.Exec {
|
||||
cmd := exec.Command(p.runtime, append(p.state.RuntimeArgs, "delete", p.id)...)
|
||||
cmd.SysProcAttr = setPDeathSig()
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v", out, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// openIO opens the pre-created fifo's for use with the container
|
||||
// in RDWR so that they remain open if the other side stops listening
|
||||
func (p *process) openIO() error {
|
||||
p.stdio = &stdio{}
|
||||
var (
|
||||
uid = p.state.RootUID
|
||||
gid = p.state.RootGID
|
||||
)
|
||||
go func() {
|
||||
if stdinCloser, err := os.OpenFile(p.state.Stdin, syscall.O_WRONLY, 0); err == nil {
|
||||
p.stdinCloser = stdinCloser
|
||||
}
|
||||
}()
|
||||
|
||||
if p.state.Terminal {
|
||||
master, console, err := newConsole(uid, gid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.console = master
|
||||
p.consolePath = console
|
||||
stdin, err := os.OpenFile(p.state.Stdin, syscall.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go io.Copy(master, stdin)
|
||||
stdout, err := os.OpenFile(p.state.Stdout, syscall.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.Add(1)
|
||||
go func() {
|
||||
io.Copy(stdout, master)
|
||||
master.Close()
|
||||
p.Done()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
i, err := p.initializeIO(uid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.shimIO = i
|
||||
// non-tty
|
||||
for name, dest := range map[string]func(f *os.File){
|
||||
p.state.Stdout: func(f *os.File) {
|
||||
p.Add(1)
|
||||
go func() {
|
||||
io.Copy(f, i.Stdout)
|
||||
p.Done()
|
||||
}()
|
||||
},
|
||||
p.state.Stderr: func(f *os.File) {
|
||||
p.Add(1)
|
||||
go func() {
|
||||
io.Copy(f, i.Stderr)
|
||||
p.Done()
|
||||
}()
|
||||
},
|
||||
} {
|
||||
f, err := os.OpenFile(name, syscall.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dest(f)
|
||||
}
|
||||
|
||||
f, err := os.OpenFile(p.state.Stdin, syscall.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go func() {
|
||||
io.Copy(i.Stdin, f)
|
||||
i.Stdin.Close()
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IO holds all 3 standard io Reader/Writer (stdin,stdout,stderr)
|
||||
type IO struct {
|
||||
Stdin io.WriteCloser
|
||||
Stdout io.ReadCloser
|
||||
Stderr io.ReadCloser
|
||||
}
|
||||
|
||||
func (p *process) initializeIO(rootuid int) (i *IO, err error) {
|
||||
var fds []uintptr
|
||||
i = &IO{}
|
||||
// cleanup in case of an error
|
||||
defer func() {
|
||||
if err != nil {
|
||||
for _, fd := range fds {
|
||||
syscall.Close(int(fd))
|
||||
}
|
||||
}
|
||||
}()
|
||||
// STDIN
|
||||
r, w, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.stdio.stdin, i.Stdin = r, w
|
||||
// STDOUT
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.stdio.stdout, i.Stdout = w, r
|
||||
// STDERR
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.stdio.stderr, i.Stderr = w, r
|
||||
// change ownership of the pipes in case we are in a user namespace
|
||||
for _, fd := range fds {
|
||||
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
func (p *process) Close() error {
|
||||
return p.stdio.Close()
|
||||
}
|
||||
|
||||
type stdio struct {
|
||||
stdin *os.File
|
||||
stdout *os.File
|
||||
stderr *os.File
|
||||
}
|
||||
|
||||
func (s *stdio) Close() error {
|
||||
err := s.stdin.Close()
|
||||
if oerr := s.stdout.Close(); err == nil {
|
||||
err = oerr
|
||||
}
|
||||
if oerr := s.stderr.Close(); err == nil {
|
||||
err = oerr
|
||||
}
|
||||
return err
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
// +build !solaris
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// setPDeathSig sets the parent death signal to SIGKILL so that if the
|
||||
// shim dies the container process also dies.
|
||||
func setPDeathSig() *syscall.SysProcAttr {
|
||||
return &syscall.SysProcAttr{
|
||||
Pdeathsig: syscall.SIGKILL,
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
// +build solaris
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// setPDeathSig is a no-op on Solaris as Pdeathsig is not defined.
|
||||
func setPDeathSig() *syscall.SysProcAttr {
|
||||
return nil
|
||||
}
|
|
@ -1,543 +0,0 @@
|
|||
package shim
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/docker/containerd"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrContainerStartTimeout = errors.New("shim: container did not start before the specified timeout")
|
||||
ErrContainerNotStarted = errors.New("shim: container not started")
|
||||
ErrProcessNotExited = errors.New("containerd: process has not exited")
|
||||
ErrShimExited = errors.New("containerd: shim exited before container process was started")
|
||||
errInvalidPidInt = errors.New("shim: process pid is invalid")
|
||||
)
|
||||
|
||||
const UnknownStatus = 255
|
||||
|
||||
type processOpts struct {
|
||||
root string
|
||||
noPivotRoot bool
|
||||
checkpoint string
|
||||
c *containerd.Container
|
||||
cmd *exec.Cmd
|
||||
exec bool
|
||||
spec specs.Process
|
||||
stdin io.Reader
|
||||
stdout io.Writer
|
||||
stderr io.Writer
|
||||
}
|
||||
|
||||
func newProcess(opts processOpts) (*process, error) {
|
||||
var (
|
||||
spec = opts.c.Spec()
|
||||
stdin, stdout, stderr string
|
||||
)
|
||||
uid, gid, err := getRootIDs(spec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, t := range []struct {
|
||||
path *string
|
||||
v interface{}
|
||||
}{
|
||||
{
|
||||
path: &stdin,
|
||||
v: opts.stdin,
|
||||
},
|
||||
{
|
||||
path: &stdout,
|
||||
v: opts.stdout,
|
||||
},
|
||||
{
|
||||
path: &stderr,
|
||||
v: opts.stderr,
|
||||
},
|
||||
} {
|
||||
p, err := getFifoPath(t.v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*t.path = p
|
||||
}
|
||||
p := &process{
|
||||
root: opts.root,
|
||||
cmd: opts.cmd,
|
||||
done: make(chan struct{}),
|
||||
spec: opts.spec,
|
||||
exec: opts.exec,
|
||||
rootUid: uid,
|
||||
rootGid: gid,
|
||||
noPivotRoot: opts.noPivotRoot,
|
||||
checkpoint: opts.checkpoint,
|
||||
stdin: stdin,
|
||||
stdout: stdout,
|
||||
stderr: stderr,
|
||||
}
|
||||
f, err := os.Create(filepath.Join(opts.root, "process.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = json.NewEncoder(f).Encode(p)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
exit, err := getExitPipe(filepath.Join(opts.root, "exit"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
control, err := getControlPipe(filepath.Join(opts.root, "control"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.exit, p.control = exit, control
|
||||
return p, nil
|
||||
}
|
||||
|
||||
type process struct {
|
||||
root string
|
||||
cmd *exec.Cmd
|
||||
done chan struct{}
|
||||
success bool
|
||||
startTime string
|
||||
mu sync.Mutex
|
||||
pid int
|
||||
exit *os.File
|
||||
control *os.File
|
||||
|
||||
spec specs.Process
|
||||
noPivotRoot bool
|
||||
exec bool
|
||||
rootUid int
|
||||
rootGid int
|
||||
checkpoint string
|
||||
stdin string
|
||||
stdout string
|
||||
stderr string
|
||||
}
|
||||
|
||||
type processState struct {
|
||||
specs.Process
|
||||
Exec bool `json:"exec"`
|
||||
RootUID int `json:"rootUID"`
|
||||
RootGID int `json:"rootGID"`
|
||||
Checkpoint string `json:"checkpoint"`
|
||||
NoPivotRoot bool `json:"noPivotRoot"`
|
||||
RuntimeArgs []string `json:"runtimeArgs"`
|
||||
Root string `json:"root"`
|
||||
StartTime string `json:"startTime"`
|
||||
// Stdin fifo filepath
|
||||
Stdin string `json:"stdin"`
|
||||
// Stdout fifo filepath
|
||||
Stdout string `json:"stdout"`
|
||||
// Stderr fifo filepath
|
||||
Stderr string `json:"stderr"`
|
||||
}
|
||||
|
||||
func (p *process) MarshalJSON() ([]byte, error) {
|
||||
ps := processState{
|
||||
Process: p.spec,
|
||||
NoPivotRoot: p.noPivotRoot,
|
||||
Checkpoint: p.checkpoint,
|
||||
RootUID: p.rootUid,
|
||||
RootGID: p.rootGid,
|
||||
Exec: p.exec,
|
||||
Stdin: p.stdin,
|
||||
Stdout: p.stdout,
|
||||
Stderr: p.stderr,
|
||||
Root: p.root,
|
||||
StartTime: p.startTime,
|
||||
}
|
||||
return json.Marshal(ps)
|
||||
}
|
||||
|
||||
func (p *process) UnmarshalJSON(b []byte) error {
|
||||
var ps processState
|
||||
if err := json.Unmarshal(b, &ps); err != nil {
|
||||
return err
|
||||
}
|
||||
p.spec = ps.Process
|
||||
p.noPivotRoot = ps.NoPivotRoot
|
||||
p.rootGid = ps.RootGID
|
||||
p.rootUid = ps.RootUID
|
||||
p.checkpoint = ps.Checkpoint
|
||||
p.exec = ps.Exec
|
||||
p.stdin = ps.Stdin
|
||||
p.stdout = ps.Stdout
|
||||
p.stderr = ps.Stderr
|
||||
p.root = ps.Root
|
||||
p.startTime = ps.StartTime
|
||||
p.done = make(chan struct{})
|
||||
pid, err := readPid(filepath.Join(p.root, "pid"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.pid = pid
|
||||
exit, err := getExitPipe(filepath.Join(p.root, "exit"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
control, err := getControlPipe(filepath.Join(p.root, "control"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.exit, p.control = exit, control
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *process) Pid() int {
|
||||
return p.pid
|
||||
}
|
||||
|
||||
func (p *process) FD() int {
|
||||
return int(p.exit.Fd())
|
||||
}
|
||||
|
||||
func (p *process) Close() error {
|
||||
return p.exit.Close()
|
||||
}
|
||||
|
||||
func (p *process) Remove() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *process) Wait() (rst uint32, rerr error) {
|
||||
<-p.done
|
||||
data, err := ioutil.ReadFile(filepath.Join(p.root, "exitStatus"))
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
rst, rerr = p.handleSigkilledShim(rst, rerr)
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return UnknownStatus, ErrProcessNotExited
|
||||
}
|
||||
return UnknownStatus, err
|
||||
}
|
||||
if len(data) == 0 {
|
||||
return UnknownStatus, ErrProcessNotExited
|
||||
}
|
||||
i, err := strconv.ParseUint(string(data), 10, 32)
|
||||
return uint32(i), err
|
||||
}
|
||||
|
||||
func (p *process) Signal(s os.Signal) error {
|
||||
_, err := fmt.Fprintf(p.control, "%d %d %d\n", 2, s, 0)
|
||||
return err
|
||||
}
|
||||
|
||||
// same checks if the process is the same process originally launched
|
||||
func (p *process) same() (bool, error) {
|
||||
/// for backwards compat assume true if it is not set
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if p.startTime == "" {
|
||||
return true, nil
|
||||
}
|
||||
pid, err := readPid(filepath.Join(p.root, "pid"))
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
started, err := readProcessStartTime(pid)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return p.startTime == started, nil
|
||||
}
|
||||
|
||||
func (p *process) checkExited() {
|
||||
err := p.cmd.Wait()
|
||||
if err == nil {
|
||||
p.mu.Lock()
|
||||
if p.success {
|
||||
p.mu.Unlock()
|
||||
return
|
||||
}
|
||||
p.success = true
|
||||
p.mu.Unlock()
|
||||
}
|
||||
if same, _ := p.same(); same && p.hasPid() {
|
||||
// The process changed its PR_SET_PDEATHSIG, so force kill it
|
||||
logrus.Infof("containerd: (pid %v) has become an orphan, killing it", p.pid)
|
||||
if err := unix.Kill(p.pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
|
||||
logrus.Errorf("containerd: unable to SIGKILL (pid %v): %v", p.pid, err)
|
||||
return
|
||||
}
|
||||
// wait for the container process to exit
|
||||
for {
|
||||
if err := unix.Kill(p.pid, 0); err != nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *process) hasPid() bool {
|
||||
p.mu.Lock()
|
||||
r := p.pid > 0
|
||||
p.mu.Unlock()
|
||||
return r
|
||||
}
|
||||
|
||||
type pidResponse struct {
|
||||
pid int
|
||||
err error
|
||||
}
|
||||
|
||||
func (p *process) waitForCreate(timeout time.Duration) error {
|
||||
r := make(chan pidResponse, 1)
|
||||
go p.readContainerPid(r)
|
||||
|
||||
select {
|
||||
case resp := <-r:
|
||||
if resp.err != nil {
|
||||
return resp.err
|
||||
}
|
||||
p.mu.Lock()
|
||||
p.pid = resp.pid
|
||||
started, err := readProcessStartTime(resp.pid)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// process already exited
|
||||
p.success = true
|
||||
p.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
logrus.Warnf("shim: unable to save starttime: %v", err)
|
||||
}
|
||||
p.startTime = started
|
||||
f, err := os.Create(filepath.Join(p.root, "process.json"))
|
||||
if err != nil {
|
||||
logrus.Warnf("shim: unable to create process.json: %v", err)
|
||||
p.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
defer f.Close()
|
||||
if err := json.NewEncoder(f).Encode(p); err != nil {
|
||||
logrus.Warnf("shim: unable to encode process: %v", err)
|
||||
}
|
||||
p.mu.Unlock()
|
||||
return nil
|
||||
case <-time.After(timeout):
|
||||
p.cmd.Process.Kill()
|
||||
p.cmd.Wait()
|
||||
return ErrContainerStartTimeout
|
||||
}
|
||||
}
|
||||
|
||||
func (p *process) readContainerPid(r chan pidResponse) {
|
||||
pidFile := filepath.Join(p.root, "pid")
|
||||
for {
|
||||
pid, err := readPid(pidFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) || err == errInvalidPidInt {
|
||||
if serr := checkErrorLogs(p.cmd,
|
||||
filepath.Join(p.root, "shim-log.json"),
|
||||
filepath.Join(p.root, "log.json")); serr != nil && !os.IsNotExist(serr) {
|
||||
r <- pidResponse{
|
||||
err: serr,
|
||||
}
|
||||
break
|
||||
}
|
||||
time.Sleep(15 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
r <- pidResponse{
|
||||
err: err,
|
||||
}
|
||||
break
|
||||
}
|
||||
r <- pidResponse{
|
||||
pid: pid,
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
func (p *process) handleSigkilledShim(rst uint32, rerr error) (uint32, error) {
|
||||
if err := unix.Kill(p.pid, 0); err == syscall.ESRCH {
|
||||
logrus.Warnf("containerd: (pid %d) does not exist", p.pid)
|
||||
// The process died while containerd was down (probably of
|
||||
// SIGKILL, but no way to be sure)
|
||||
return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus)
|
||||
}
|
||||
|
||||
// If it's not the same process, just mark it stopped and set
|
||||
// the status to the UnknownStatus value (i.e. 255)
|
||||
if same, _ := p.same(); !same {
|
||||
// Create the file so we get the exit event generated once monitor kicks in
|
||||
// without having to go through all this process again
|
||||
return UnknownStatus, writeExitStatus(filepath.Join(p.root, "exitStatus"), UnknownStatus)
|
||||
}
|
||||
ppid, err := readProcStatField(p.pid, 4)
|
||||
if err != nil {
|
||||
return rst, fmt.Errorf("could not check process ppid: %v (%v)", err, rerr)
|
||||
}
|
||||
if ppid == "1" {
|
||||
if err := unix.Kill(p.pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
|
||||
return UnknownStatus, fmt.Errorf(
|
||||
"containerd: unable to SIGKILL (pid %v): %v", p.pid, err)
|
||||
}
|
||||
// wait for the process to die
|
||||
for {
|
||||
if err := unix.Kill(p.pid, 0); err == syscall.ESRCH {
|
||||
break
|
||||
}
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
}
|
||||
// Create the file so we get the exit event generated once monitor kicks in
|
||||
// without having to go through all this process again
|
||||
status := 128 + uint32(syscall.SIGKILL)
|
||||
return status, writeExitStatus(filepath.Join(p.root, "exitStatus"), status)
|
||||
}
|
||||
return rst, rerr
|
||||
}
|
||||
|
||||
func checkErrorLogs(cmd *exec.Cmd, shimLogPath, runtimeLogPath string) error {
|
||||
alive, err := isAlive(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !alive {
|
||||
// runc could have failed to run the container so lets get the error
|
||||
// out of the logs or the shim could have encountered an error
|
||||
messages, err := readLogMessages(shimLogPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, m := range messages {
|
||||
if m.Level == "error" {
|
||||
return fmt.Errorf("shim error: %v", m.Msg)
|
||||
}
|
||||
}
|
||||
// no errors reported back from shim, check for runc/runtime errors
|
||||
messages, err = readLogMessages(runtimeLogPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
err = ErrContainerNotStarted
|
||||
}
|
||||
return err
|
||||
}
|
||||
for _, m := range messages {
|
||||
if m.Level == "error" {
|
||||
return fmt.Errorf("oci runtime error: %v", m.Msg)
|
||||
}
|
||||
}
|
||||
return ErrContainerNotStarted
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readProcessStartTime(pid int) (string, error) {
|
||||
return readProcStatField(pid, 22)
|
||||
}
|
||||
|
||||
func readProcStatField(pid int, field int) (string, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(string(filepath.Separator), "proc", strconv.Itoa(pid), "stat"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if field > 2 {
|
||||
// First, split out the name since he could contains spaces.
|
||||
parts := strings.Split(string(data), ") ")
|
||||
// Now split out the rest, we end up with 2 fields less
|
||||
parts = strings.Split(parts[1], " ")
|
||||
return parts[field-2-1], nil // field count start at 1 in manual
|
||||
}
|
||||
parts := strings.Split(string(data), " (")
|
||||
if field == 1 {
|
||||
return parts[0], nil
|
||||
}
|
||||
return strings.Split(parts[1], ") ")[0], nil
|
||||
}
|
||||
|
||||
func readPid(pidFile string) (int, error) {
|
||||
data, err := ioutil.ReadFile(pidFile)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
i, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return -1, errInvalidPidInt
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// isAlive checks if the shim that launched the container is still alive
|
||||
func isAlive(cmd *exec.Cmd) (bool, error) {
|
||||
if _, err := syscall.Wait4(cmd.Process.Pid, nil, syscall.WNOHANG, nil); err == nil {
|
||||
return true, nil
|
||||
}
|
||||
if err := syscall.Kill(cmd.Process.Pid, 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
type message struct {
|
||||
Level string `json:"level"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
func readLogMessages(path string) ([]message, error) {
|
||||
var out []message
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
dec := json.NewDecoder(f)
|
||||
for {
|
||||
var m message
|
||||
if err := dec.Decode(&m); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, m)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func getExitPipe(path string) (*os.File, error) {
|
||||
if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
// add NONBLOCK in case the other side has already closed or else
|
||||
// this function would never return
|
||||
return os.OpenFile(path, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
|
||||
}
|
||||
|
||||
func getControlPipe(path string) (*os.File, error) {
|
||||
if err := unix.Mkfifo(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
return os.OpenFile(path, syscall.O_RDWR|syscall.O_NONBLOCK, 0)
|
||||
}
|
||||
|
||||
func writeExitStatus(path string, status uint32) error {
|
||||
return ioutil.WriteFile(path, []byte(fmt.Sprintf("%u", status)), 0644)
|
||||
}
|
|
@ -1,430 +0,0 @@
|
|||
package shim
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/docker/containerd"
|
||||
"github.com/docker/containerd/executors/oci"
|
||||
"github.com/docker/containerd/monitor"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
/*
|
||||
├── libcontainerd
|
||||
│ ├── containerd
|
||||
│ │ └── ff2e86955c2be43f0e3c300fbd3786599301bd8efcaa5a386587f132e73af242
|
||||
│ │ ├── init
|
||||
│ │ │ ├── control
|
||||
│ │ │ ├── exit
|
||||
│ │ │ ├── log.json
|
||||
│ │ │ ├── pid
|
||||
│ │ │ ├── process.json
|
||||
│ │ │ ├── shim-log.json
|
||||
│ │ │ └── starttime
|
||||
│ │ └── state.json
|
||||
*/
|
||||
|
||||
var (
|
||||
ErrNotFifo = errors.New("shim: IO is not a valid fifo on disk")
|
||||
errInitProcessNotExist = errors.New("shim: init process does not exist")
|
||||
)
|
||||
|
||||
type Opts struct {
|
||||
Name string
|
||||
RuntimeName string
|
||||
RuntimeArgs []string
|
||||
RuntimeRoot string
|
||||
NoPivotRoot bool
|
||||
Root string
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
func New(opts Opts) (*Shim, error) {
|
||||
if err := os.MkdirAll(filepath.Dir(opts.Root), 0711); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := os.Mkdir(opts.Root, 0711); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r, err := oci.New(oci.Opts{
|
||||
Root: opts.RuntimeRoot,
|
||||
Name: opts.RuntimeName,
|
||||
Args: opts.RuntimeArgs,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m, err := monitor.New()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s := &Shim{
|
||||
root: opts.Root,
|
||||
name: opts.Name,
|
||||
timeout: opts.Timeout,
|
||||
runtime: r,
|
||||
processes: make(map[string]*process),
|
||||
m: m,
|
||||
}
|
||||
go s.startMonitor()
|
||||
f, err := os.Create(filepath.Join(opts.Root, "state.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = json.NewEncoder(f).Encode(s)
|
||||
f.Close()
|
||||
return s, err
|
||||
}
|
||||
|
||||
// Load will load an existing shim with all its information restored from the
|
||||
// provided path
|
||||
func Load(root string) (*Shim, error) {
|
||||
f, err := os.Open(filepath.Join(root, "state.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var s Shim
|
||||
err = json.NewDecoder(f).Decode(&s)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m, err := monitor.New()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.m = m
|
||||
go s.startMonitor()
|
||||
dirs, err := ioutil.ReadDir(root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, d := range dirs {
|
||||
if !d.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := d.Name()
|
||||
if f, err = os.Open(filepath.Join(root, name, "process.json")); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var p process
|
||||
err = json.NewDecoder(f).Decode(&p)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.processes[name] = &p
|
||||
if err := s.m.Add(&p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &s, nil
|
||||
}
|
||||
|
||||
// Shim is a container runtime that adds a shim process as the container's parent
|
||||
// to hold open stdio and other resources so that higher level daemons can exit and
|
||||
// load running containers for handling upgrades and/or crashes
|
||||
//
|
||||
// The shim uses an OCI compliant runtime as its executor
|
||||
type Shim struct {
|
||||
// root holds runtime state information for the containers
|
||||
// launched by the runtime
|
||||
root string
|
||||
name string
|
||||
timeout time.Duration
|
||||
noPivotRoot bool
|
||||
runtime *oci.OCIRuntime
|
||||
pmu sync.Mutex
|
||||
processes map[string]*process
|
||||
bundle string
|
||||
checkpoint string
|
||||
m *monitor.Monitor
|
||||
}
|
||||
|
||||
type state struct {
|
||||
Root string `json:"root"`
|
||||
// Bundle is the path to the container's bundle
|
||||
Bundle string `json:"bundle"`
|
||||
// OCI runtime binary name
|
||||
Runtime string `json:"runtime"`
|
||||
// OCI runtime args
|
||||
RuntimeArgs []string `json:"runtimeArgs"`
|
||||
RuntimeRoot string `json:"runtimeRoot"`
|
||||
// Shim binary name
|
||||
Name string `json:"shim"`
|
||||
/// NoPivotRoot option
|
||||
NoPivotRoot bool `json:"noPivotRoot"`
|
||||
// Timeout for container start
|
||||
Timeout time.Duration `json:"timeout"`
|
||||
}
|
||||
|
||||
func (s *Shim) MarshalJSON() ([]byte, error) {
|
||||
st := state{
|
||||
Name: s.name,
|
||||
Bundle: s.bundle,
|
||||
Runtime: s.runtime.Name(),
|
||||
RuntimeArgs: s.runtime.Args(),
|
||||
RuntimeRoot: s.runtime.Root(),
|
||||
NoPivotRoot: s.noPivotRoot,
|
||||
Timeout: s.timeout,
|
||||
Root: s.root,
|
||||
}
|
||||
return json.Marshal(st)
|
||||
}
|
||||
|
||||
func (s *Shim) UnmarshalJSON(b []byte) error {
|
||||
var st state
|
||||
if err := json.Unmarshal(b, &st); err != nil {
|
||||
return err
|
||||
}
|
||||
s.root = st.Root
|
||||
s.name = st.Name
|
||||
s.bundle = st.Bundle
|
||||
s.timeout = st.Timeout
|
||||
s.noPivotRoot = st.NoPivotRoot
|
||||
r, err := oci.New(oci.Opts{
|
||||
Name: st.Runtime,
|
||||
Args: st.RuntimeArgs,
|
||||
Root: st.RuntimeRoot,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.runtime = r
|
||||
s.processes = make(map[string]*process)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Shim) Create(c *containerd.Container) (containerd.ProcessDelegate, error) {
|
||||
s.bundle = c.Path()
|
||||
var (
|
||||
root = filepath.Join(s.root, "init")
|
||||
cmd = s.command(c.ID(), c.Path(), s.runtime.Name())
|
||||
)
|
||||
if err := os.Mkdir(root, 0711); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// exec the shim inside the state directory setup with the process
|
||||
// information for what is being run
|
||||
cmd.Dir = root
|
||||
// make sure the shim is in a new process group
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setpgid: true,
|
||||
}
|
||||
p, err := s.startCommand(processOpts{
|
||||
spec: c.Spec().Process,
|
||||
root: root,
|
||||
noPivotRoot: s.noPivotRoot,
|
||||
checkpoint: s.checkpoint,
|
||||
c: c,
|
||||
cmd: cmd,
|
||||
stdin: c.Stdin,
|
||||
stdout: c.Stdout,
|
||||
stderr: c.Stderr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.pmu.Lock()
|
||||
s.processes["init"] = p
|
||||
s.pmu.Unlock()
|
||||
f, err := os.Create(filepath.Join(s.root, "state.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = json.NewEncoder(f).Encode(s)
|
||||
f.Close()
|
||||
// ~TODO: oom and stats stuff here
|
||||
return p, err
|
||||
}
|
||||
|
||||
func (s *Shim) Start(c *containerd.Container) error {
|
||||
p, err := s.getContainerInit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var (
|
||||
errC = make(chan error, 1)
|
||||
cmd = s.runtime.Command("start", c.ID())
|
||||
)
|
||||
go func() {
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
errC <- fmt.Errorf("%s: %q", err, out)
|
||||
}
|
||||
errC <- nil
|
||||
}()
|
||||
select {
|
||||
case err := <-errC:
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case <-p.done:
|
||||
if !p.success {
|
||||
if cmd.Process != nil {
|
||||
cmd.Process.Kill()
|
||||
}
|
||||
cmd.Wait()
|
||||
return ErrShimExited
|
||||
}
|
||||
err := <-errC
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Shim) Delete(c *containerd.Container) error {
|
||||
if err := s.runtime.Delete(c); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.RemoveAll(s.root)
|
||||
}
|
||||
|
||||
func (s *Shim) Exec(c *containerd.Container, p *containerd.Process) (containerd.ProcessDelegate, error) {
|
||||
root, err := ioutil.TempDir(s.root, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cmd := s.command(c.ID(), c.Path(), s.runtime.Name())
|
||||
// exec the shim inside the state directory setup with the process
|
||||
// information for what is being run
|
||||
cmd.Dir = root
|
||||
// make sure the shim is in a new process group
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setpgid: true,
|
||||
}
|
||||
sp, err := s.startCommand(processOpts{
|
||||
exec: true,
|
||||
spec: *p.Spec(),
|
||||
root: root,
|
||||
noPivotRoot: s.noPivotRoot,
|
||||
checkpoint: s.checkpoint,
|
||||
c: c,
|
||||
cmd: cmd,
|
||||
stdin: p.Stdin,
|
||||
stdout: p.Stdout,
|
||||
stderr: p.Stderr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.pmu.Lock()
|
||||
s.processes[filepath.Base(root)] = sp
|
||||
s.pmu.Unlock()
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
func (s *Shim) Load(id string) (containerd.ProcessDelegate, error) {
|
||||
return s.getContainerInit()
|
||||
}
|
||||
|
||||
func (s *Shim) getContainerInit() (*process, error) {
|
||||
s.pmu.Lock()
|
||||
p, ok := s.processes["init"]
|
||||
s.pmu.Unlock()
|
||||
if !ok {
|
||||
return nil, errInitProcessNotExist
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func (s *Shim) startCommand(opts processOpts) (*process, error) {
|
||||
p, err := newProcess(opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := s.m.Add(p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := opts.cmd.Start(); err != nil {
|
||||
close(p.done)
|
||||
if checkShimNotFound(err) {
|
||||
return nil, fmt.Errorf("%s not install on system", s.name)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
// make sure it does not die before we get the container's pid
|
||||
defer func() {
|
||||
go p.checkExited()
|
||||
}()
|
||||
if err := p.waitForCreate(s.timeout); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func (s *Shim) command(args ...string) *exec.Cmd {
|
||||
return exec.Command(s.name, args...)
|
||||
}
|
||||
|
||||
func (s *Shim) startMonitor() {
|
||||
go s.m.Run()
|
||||
defer s.m.Close()
|
||||
for m := range s.m.Events() {
|
||||
p := m.(*process)
|
||||
close(p.done)
|
||||
}
|
||||
}
|
||||
|
||||
// checkShimNotFound checks the error returned from a exec call to see if the binary
|
||||
// that was called exists on the system and returns true if the shim binary does not exist
|
||||
func checkShimNotFound(err error) bool {
|
||||
if exitError, ok := err.(*exec.Error); ok {
|
||||
e := exitError.Err
|
||||
return e == exec.ErrNotFound || e == os.ErrNotExist
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// getFifoPath returns the path to the fifo on disk as long as the provided
|
||||
// interface is an *os.File and has a valid path on the Name() method call
|
||||
func getFifoPath(v interface{}) (string, error) {
|
||||
f, ok := v.(*os.File)
|
||||
if !ok {
|
||||
return "", ErrNotFifo
|
||||
}
|
||||
p := f.Name()
|
||||
if p == "" {
|
||||
return "", ErrNotFifo
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func getRootIDs(s *specs.Spec) (int, int, error) {
|
||||
if s == nil {
|
||||
return 0, 0, nil
|
||||
}
|
||||
var hasUserns bool
|
||||
for _, ns := range s.Linux.Namespaces {
|
||||
if ns.Type == specs.UserNamespace {
|
||||
hasUserns = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasUserns {
|
||||
return 0, 0, nil
|
||||
}
|
||||
uid := hostIDFromMap(0, s.Linux.UIDMappings)
|
||||
gid := hostIDFromMap(0, s.Linux.GIDMappings)
|
||||
return uid, gid, nil
|
||||
}
|
||||
|
||||
func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
|
||||
for _, m := range mp {
|
||||
if (id >= m.ContainerID) && (id <= (m.ContainerID + m.Size - 1)) {
|
||||
return int(m.HostID + (id - m.ContainerID))
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
Loading…
Reference in a new issue