ca7c504068
Prior to this patch, when list containers by "ctr containers" or "ctr containers xxx", it will not get the proper status of conatinser(s). That was caused by the wrong implementation of State() for structure process, it only send a signal "0" to ping the "init" process and do nothing. Since the OCI/runc has implemented an interface Status(), we can use that. And I think this is more compatible with the design for containerd: - containerd -> runtime -> fun() Signed-off-by: Hu Keping <hukeping@huawei.com>
500 lines
10 KiB
Go
500 lines
10 KiB
Go
package runtime
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/docker/containerd/specs"
|
|
"github.com/opencontainers/runc/libcontainer"
|
|
ocs "github.com/opencontainers/specs/specs-go"
|
|
)
|
|
|
|
func getRootIDs(s *specs.Spec) (int, int, error) {
|
|
if s == nil {
|
|
return 0, 0, nil
|
|
}
|
|
var hasUserns bool
|
|
for _, ns := range s.Linux.Namespaces {
|
|
if ns.Type == ocs.UserNamespace {
|
|
hasUserns = true
|
|
break
|
|
}
|
|
}
|
|
if !hasUserns {
|
|
return 0, 0, nil
|
|
}
|
|
uid := hostIDFromMap(0, s.Linux.UIDMappings)
|
|
gid := hostIDFromMap(0, s.Linux.GIDMappings)
|
|
return uid, gid, nil
|
|
}
|
|
|
|
func (c *container) State() State {
|
|
proc := c.processes["init"]
|
|
if proc == nil {
|
|
return Stopped
|
|
}
|
|
return proc.State()
|
|
}
|
|
|
|
func (c *container) Runtime() string {
|
|
return c.runtime
|
|
}
|
|
|
|
func (c *container) Pause() error {
|
|
args := c.runtimeArgs
|
|
args = append(args, "pause", c.id)
|
|
b, err := exec.Command(c.runtime, args...).CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf(string(b))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *container) Resume() error {
|
|
args := c.runtimeArgs
|
|
args = append(args, "resume", c.id)
|
|
b, err := exec.Command(c.runtime, args...).CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf(string(b))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *container) Checkpoints() ([]Checkpoint, error) {
|
|
dirs, err := ioutil.ReadDir(filepath.Join(c.bundle, "checkpoints"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var out []Checkpoint
|
|
for _, d := range dirs {
|
|
if !d.IsDir() {
|
|
continue
|
|
}
|
|
path := filepath.Join(c.bundle, "checkpoints", d.Name(), "config.json")
|
|
data, err := ioutil.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var cpt Checkpoint
|
|
if err := json.Unmarshal(data, &cpt); err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, cpt)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func (c *container) Checkpoint(cpt Checkpoint) error {
|
|
if err := os.MkdirAll(filepath.Join(c.bundle, "checkpoints"), 0755); err != nil {
|
|
return err
|
|
}
|
|
path := filepath.Join(c.bundle, "checkpoints", cpt.Name)
|
|
if err := os.Mkdir(path, 0755); err != nil {
|
|
return err
|
|
}
|
|
f, err := os.Create(filepath.Join(path, "config.json"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cpt.Created = time.Now()
|
|
err = json.NewEncoder(f).Encode(cpt)
|
|
f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
args := []string{
|
|
"checkpoint",
|
|
"--image-path", path,
|
|
}
|
|
add := func(flags ...string) {
|
|
args = append(args, flags...)
|
|
}
|
|
add(c.runtimeArgs...)
|
|
if !cpt.Exit {
|
|
add("--leave-running")
|
|
}
|
|
if cpt.Shell {
|
|
add("--shell-job")
|
|
}
|
|
if cpt.Tcp {
|
|
add("--tcp-established")
|
|
}
|
|
if cpt.UnixSockets {
|
|
add("--ext-unix-sk")
|
|
}
|
|
add(c.id)
|
|
out, err := exec.Command(c.runtime, args...).CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("%s: %s", err.Error(), string(out))
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (c *container) DeleteCheckpoint(name string) error {
|
|
return os.RemoveAll(filepath.Join(c.bundle, "checkpoints", name))
|
|
}
|
|
|
|
func (c *container) Start(checkpoint string, s Stdio) (Process, error) {
|
|
processRoot := filepath.Join(c.root, c.id, InitProcessID)
|
|
if err := os.Mkdir(processRoot, 0755); err != nil {
|
|
return nil, err
|
|
}
|
|
cmd := exec.Command(c.shim,
|
|
c.id, c.bundle, c.runtime,
|
|
)
|
|
cmd.Dir = processRoot
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
Setpgid: true,
|
|
}
|
|
spec, err := c.readSpec()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
config := &processConfig{
|
|
checkpoint: checkpoint,
|
|
root: processRoot,
|
|
id: InitProcessID,
|
|
c: c,
|
|
stdio: s,
|
|
spec: spec,
|
|
processSpec: specs.ProcessSpec(spec.Process),
|
|
}
|
|
p, err := newProcess(config)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := c.startCmd(InitProcessID, cmd, p); err != nil {
|
|
return nil, err
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
func (c *container) Exec(pid string, pspec specs.ProcessSpec, s Stdio) (pp Process, err error) {
|
|
processRoot := filepath.Join(c.root, c.id, pid)
|
|
if err := os.Mkdir(processRoot, 0755); err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
c.RemoveProcess(pid)
|
|
}
|
|
}()
|
|
cmd := exec.Command(c.shim,
|
|
c.id, c.bundle, c.runtime,
|
|
)
|
|
cmd.Dir = processRoot
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
Setpgid: true,
|
|
}
|
|
spec, err := c.readSpec()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
config := &processConfig{
|
|
exec: true,
|
|
id: pid,
|
|
root: processRoot,
|
|
c: c,
|
|
processSpec: pspec,
|
|
spec: spec,
|
|
stdio: s,
|
|
}
|
|
p, err := newProcess(config)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := c.startCmd(pid, cmd, p); err != nil {
|
|
return nil, err
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
func (c *container) startCmd(pid string, cmd *exec.Cmd, p *process) error {
|
|
if err := cmd.Start(); err != nil {
|
|
if exErr, ok := err.(*exec.Error); ok {
|
|
if exErr.Err == exec.ErrNotFound || exErr.Err == os.ErrNotExist {
|
|
return fmt.Errorf("%s not installed on system", c.shim)
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
if err := c.waitForStart(p, cmd); err != nil {
|
|
return err
|
|
}
|
|
c.processes[pid] = p
|
|
return nil
|
|
}
|
|
|
|
func (c *container) getLibctContainer() (libcontainer.Container, error) {
|
|
runtimeRoot := "/run/runc"
|
|
|
|
// Check that the root wasn't changed
|
|
for _, opt := range c.runtimeArgs {
|
|
if strings.HasPrefix(opt, "--root=") {
|
|
runtimeRoot = strings.TrimPrefix(opt, "--root=")
|
|
break
|
|
}
|
|
}
|
|
|
|
f, err := libcontainer.New(runtimeRoot, libcontainer.Cgroupfs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return f.Load(c.id)
|
|
}
|
|
|
|
func hostIDFromMap(id uint32, mp []ocs.IDMapping) int {
|
|
for _, m := range mp {
|
|
if (id >= m.ContainerID) && (id <= (m.ContainerID + m.Size - 1)) {
|
|
return int(m.HostID + (id - m.ContainerID))
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (c *container) Pids() ([]int, error) {
|
|
container, err := c.getLibctContainer()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return container.Processes()
|
|
}
|
|
|
|
func (c *container) Stats() (*Stat, error) {
|
|
container, err := c.getLibctContainer()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
now := time.Now()
|
|
stats, err := container.Stats()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &Stat{
|
|
Timestamp: now,
|
|
Data: stats,
|
|
}, nil
|
|
}
|
|
|
|
// Status implements the runtime Container interface.
|
|
func (c *container) Status() (State, error) {
|
|
args := c.runtimeArgs
|
|
args = append(args, "state", c.id)
|
|
|
|
out, err := exec.Command(c.runtime, args...).CombinedOutput()
|
|
if err != nil {
|
|
return "", fmt.Errorf(string(out))
|
|
}
|
|
|
|
// We only require the runtime json output to have a top level Status field.
|
|
var s struct {
|
|
Status State `json:"status"`
|
|
}
|
|
if err := json.Unmarshal(out, &s); err != nil {
|
|
return "", err
|
|
}
|
|
return s.Status, nil
|
|
}
|
|
|
|
func (c *container) OOM() (OOM, error) {
|
|
container, err := c.getLibctContainer()
|
|
if err != nil {
|
|
if lerr, ok := err.(libcontainer.Error); ok {
|
|
// with oom registration sometimes the container can run, exit, and be destroyed
|
|
// faster than we can get the state back so we can just ignore this
|
|
if lerr.Code() == libcontainer.ContainerNotExists {
|
|
return nil, ErrContainerExited
|
|
}
|
|
}
|
|
return nil, err
|
|
}
|
|
state, err := container.State()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
memoryPath := state.CgroupPaths["memory"]
|
|
return c.getMemeoryEventFD(memoryPath)
|
|
}
|
|
|
|
func (c *container) getMemeoryEventFD(root string) (*oom, error) {
|
|
f, err := os.Open(filepath.Join(root, "memory.oom_control"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
fd, _, serr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
|
|
if serr != 0 {
|
|
f.Close()
|
|
return nil, serr
|
|
}
|
|
if err := c.writeEventFD(root, int(f.Fd()), int(fd)); err != nil {
|
|
syscall.Close(int(fd))
|
|
f.Close()
|
|
return nil, err
|
|
}
|
|
return &oom{
|
|
root: root,
|
|
id: c.id,
|
|
eventfd: int(fd),
|
|
control: f,
|
|
}, nil
|
|
}
|
|
|
|
func (c *container) writeEventFD(root string, cfd, efd int) error {
|
|
f, err := os.OpenFile(filepath.Join(root, "cgroup.event_control"), os.O_WRONLY, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
_, err = f.WriteString(fmt.Sprintf("%d %d", efd, cfd))
|
|
return err
|
|
}
|
|
|
|
type waitArgs struct {
|
|
pid int
|
|
err error
|
|
}
|
|
|
|
func (c *container) waitForStart(p *process, cmd *exec.Cmd) error {
|
|
wc := make(chan error, 1)
|
|
go func() {
|
|
for {
|
|
if _, err := p.getPidFromFile(); err != nil {
|
|
if os.IsNotExist(err) || err == errInvalidPidInt {
|
|
alive, err := isAlive(cmd)
|
|
if err != nil {
|
|
wc <- err
|
|
return
|
|
}
|
|
if !alive {
|
|
// runc could have failed to run the container so lets get the error
|
|
// out of the logs or the shim could have encountered an error
|
|
messages, err := readLogMessages(filepath.Join(p.root, "shim-log.json"))
|
|
if err != nil {
|
|
wc <- err
|
|
return
|
|
}
|
|
for _, m := range messages {
|
|
if m.Level == "error" {
|
|
wc <- fmt.Errorf("shim error: %v", m.Msg)
|
|
return
|
|
}
|
|
}
|
|
// no errors reported back from shim, check for runc/runtime errors
|
|
messages, err = readLogMessages(filepath.Join(p.root, "log.json"))
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
err = ErrContainerNotStarted
|
|
}
|
|
wc <- err
|
|
return
|
|
}
|
|
for _, m := range messages {
|
|
if m.Level == "error" {
|
|
wc <- fmt.Errorf("oci runtime error: %v", m.Msg)
|
|
return
|
|
}
|
|
}
|
|
wc <- ErrContainerNotStarted
|
|
return
|
|
}
|
|
time.Sleep(15 * time.Millisecond)
|
|
continue
|
|
}
|
|
wc <- err
|
|
return
|
|
}
|
|
// the pid file was read successfully
|
|
wc <- nil
|
|
return
|
|
}
|
|
}()
|
|
select {
|
|
case err := <-wc:
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
case <-time.After(c.timeout):
|
|
cmd.Process.Kill()
|
|
cmd.Wait()
|
|
return ErrContainerStartTimeout
|
|
}
|
|
}
|
|
|
|
// isAlive checks if the shim that launched the container is still alive
|
|
func isAlive(cmd *exec.Cmd) (bool, error) {
|
|
if err := syscall.Kill(cmd.Process.Pid, 0); err != nil {
|
|
if err == syscall.ESRCH {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
type oom struct {
|
|
id string
|
|
root string
|
|
control *os.File
|
|
eventfd int
|
|
}
|
|
|
|
func (o *oom) ContainerID() string {
|
|
return o.id
|
|
}
|
|
|
|
func (o *oom) FD() int {
|
|
return o.eventfd
|
|
}
|
|
|
|
func (o *oom) Flush() {
|
|
buf := make([]byte, 8)
|
|
syscall.Read(o.eventfd, buf)
|
|
}
|
|
|
|
func (o *oom) Removed() bool {
|
|
_, err := os.Lstat(filepath.Join(o.root, "cgroup.event_control"))
|
|
return os.IsNotExist(err)
|
|
}
|
|
|
|
func (o *oom) Close() error {
|
|
err := syscall.Close(o.eventfd)
|
|
if cerr := o.control.Close(); err == nil {
|
|
err = cerr
|
|
}
|
|
return err
|
|
}
|
|
|
|
type message struct {
|
|
Level string `json:"level"`
|
|
Msg string `json:"msg"`
|
|
}
|
|
|
|
func readLogMessages(path string) ([]message, error) {
|
|
var out []message
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
dec := json.NewDecoder(f)
|
|
for {
|
|
var m message
|
|
if err := dec.Decode(&m); err != nil {
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
return nil, err
|
|
}
|
|
out = append(out, m)
|
|
}
|
|
return out, nil
|
|
}
|