containerd/runtime/container.go
Michael Crosby db789045bd Update runc in dockerfile for new commands
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
2016-04-26 15:06:25 -07:00

768 lines
17 KiB
Go

package runtime
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/specs"
"github.com/opencontainers/runc/libcontainer"
ocs "github.com/opencontainers/runtime-spec/specs-go"
)
type Container interface {
// ID returns the container ID
ID() string
// Path returns the path to the bundle
Path() string
// Start starts the init process of the container
Start(checkpoint string, s Stdio) (Process, error)
// Exec starts another process in an existing container
Exec(string, specs.ProcessSpec, Stdio) (Process, error)
// Delete removes the container's state and any resources
Delete() error
// Processes returns all the containers processes that have been added
Processes() ([]Process, error)
// State returns the containers runtime state
State() State
// Resume resumes a paused container
Resume() error
// Pause pauses a running container
Pause() error
// RemoveProcess removes the specified process from the container
RemoveProcess(string) error
// Checkpoints returns all the checkpoints for a container
Checkpoints() ([]Checkpoint, error)
// Checkpoint creates a new checkpoint
Checkpoint(Checkpoint) error
// DeleteCheckpoint deletes the checkpoint for the provided name
DeleteCheckpoint(name string) error
// Labels are user provided labels for the container
Labels() []string
// Pids returns all pids inside the container
Pids() ([]int, error)
// Stats returns realtime container stats and resource information
Stats() (*Stat, error)
// Name or path of the OCI compliant runtime used to execute the container
Runtime() string
// OOM signals the channel if the container received an OOM notification
OOM() (OOM, error)
// UpdateResource updates the containers resources to new values
UpdateResources(*Resource) error
// Status return the current status of the container.
Status() (State, error)
}
type OOM interface {
io.Closer
FD() int
ContainerID() string
Flush()
Removed() bool
}
type Stdio struct {
Stdin string
Stdout string
Stderr string
}
func NewStdio(stdin, stdout, stderr string) Stdio {
for _, s := range []*string{
&stdin, &stdout, &stderr,
} {
if *s == "" {
*s = "/dev/null"
}
}
return Stdio{
Stdin: stdin,
Stdout: stdout,
Stderr: stderr,
}
}
type ContainerOpts struct {
Root string
ID string
Bundle string
Runtime string
RuntimeArgs []string
Shim string
Labels []string
NoPivotRoot bool
Timeout time.Duration
}
// New returns a new container
func New(opts ContainerOpts) (Container, error) {
c := &container{
root: opts.Root,
id: opts.ID,
bundle: opts.Bundle,
labels: opts.Labels,
processes: make(map[string]*process),
runtime: opts.Runtime,
runtimeArgs: opts.RuntimeArgs,
shim: opts.Shim,
noPivotRoot: opts.NoPivotRoot,
timeout: opts.Timeout,
}
if err := os.Mkdir(filepath.Join(c.root, c.id), 0755); err != nil {
return nil, err
}
f, err := os.Create(filepath.Join(c.root, c.id, StateFile))
if err != nil {
return nil, err
}
defer f.Close()
if err := json.NewEncoder(f).Encode(state{
Bundle: c.bundle,
Labels: c.labels,
Runtime: c.runtime,
RuntimeArgs: c.runtimeArgs,
NoPivotRoot: opts.NoPivotRoot,
}); err != nil {
return nil, err
}
return c, nil
}
func Load(root, id string) (Container, error) {
var s state
f, err := os.Open(filepath.Join(root, id, StateFile))
if err != nil {
return nil, err
}
defer f.Close()
if err := json.NewDecoder(f).Decode(&s); err != nil {
return nil, err
}
c := &container{
root: root,
id: id,
bundle: s.Bundle,
labels: s.Labels,
runtime: s.Runtime,
runtimeArgs: s.RuntimeArgs,
shim: s.Shim,
noPivotRoot: s.NoPivotRoot,
processes: make(map[string]*process),
}
dirs, err := ioutil.ReadDir(filepath.Join(root, id))
if err != nil {
return nil, err
}
for _, d := range dirs {
if !d.IsDir() {
continue
}
pid := d.Name()
s, err := readProcessState(filepath.Join(root, id, pid))
if err != nil {
return nil, err
}
p, err := loadProcess(filepath.Join(root, id, pid), pid, c, s)
if err != nil {
logrus.WithField("id", id).WithField("pid", pid).Debug("containerd: error loading process %s", err)
continue
}
c.processes[pid] = p
}
return c, nil
}
func readProcessState(dir string) (*ProcessState, error) {
f, err := os.Open(filepath.Join(dir, "process.json"))
if err != nil {
return nil, err
}
defer f.Close()
var s ProcessState
if err := json.NewDecoder(f).Decode(&s); err != nil {
return nil, err
}
return &s, nil
}
type container struct {
// path to store runtime state information
root string
id string
bundle string
runtime string
runtimeArgs []string
shim string
processes map[string]*process
labels []string
oomFds []int
noPivotRoot bool
timeout time.Duration
}
func (c *container) ID() string {
return c.id
}
func (c *container) Path() string {
return c.bundle
}
func (c *container) Labels() []string {
return c.labels
}
func (c *container) readSpec() (*specs.Spec, error) {
var spec specs.Spec
f, err := os.Open(filepath.Join(c.bundle, "config.json"))
if err != nil {
return nil, err
}
defer f.Close()
if err := json.NewDecoder(f).Decode(&spec); err != nil {
return nil, err
}
return &spec, nil
}
func (c *container) Delete() error {
err := os.RemoveAll(filepath.Join(c.root, c.id))
args := c.runtimeArgs
args = append(args, "delete", c.id)
if derr := exec.Command(c.runtime, args...).Run(); err == nil {
err = derr
}
return err
}
func (c *container) Processes() ([]Process, error) {
out := []Process{}
for _, p := range c.processes {
out = append(out, p)
}
return out, nil
}
func (c *container) RemoveProcess(pid string) error {
delete(c.processes, pid)
return os.RemoveAll(filepath.Join(c.root, c.id, pid))
}
func (c *container) UpdateResources(r *Resource) error {
container, err := c.getLibctContainer()
if err != nil {
return err
}
config := container.Config()
config.Cgroups.Resources.CpuShares = r.CPUShares
config.Cgroups.Resources.BlkioWeight = r.BlkioWeight
config.Cgroups.Resources.CpuPeriod = r.CPUPeriod
config.Cgroups.Resources.CpuQuota = r.CPUQuota
config.Cgroups.Resources.CpusetCpus = r.CpusetCpus
config.Cgroups.Resources.CpusetMems = r.CpusetMems
config.Cgroups.Resources.KernelMemory = r.KernelMemory
config.Cgroups.Resources.Memory = r.Memory
config.Cgroups.Resources.MemoryReservation = r.MemoryReservation
config.Cgroups.Resources.MemorySwap = r.MemorySwap
return container.Set(config)
}
func getRootIDs(s *specs.Spec) (int, int, error) {
if s == nil {
return 0, 0, nil
}
var hasUserns bool
for _, ns := range s.Linux.Namespaces {
if ns.Type == ocs.UserNamespace {
hasUserns = true
break
}
}
if !hasUserns {
return 0, 0, nil
}
uid := hostIDFromMap(0, s.Linux.UIDMappings)
gid := hostIDFromMap(0, s.Linux.GIDMappings)
return uid, gid, nil
}
func (c *container) State() State {
proc := c.processes["init"]
if proc == nil {
return Stopped
}
return proc.State()
}
func (c *container) Runtime() string {
return c.runtime
}
func (c *container) Pause() error {
args := c.runtimeArgs
args = append(args, "pause", c.id)
b, err := exec.Command(c.runtime, args...).CombinedOutput()
if err != nil {
return fmt.Errorf(string(b))
}
return nil
}
func (c *container) Resume() error {
args := c.runtimeArgs
args = append(args, "resume", c.id)
b, err := exec.Command(c.runtime, args...).CombinedOutput()
if err != nil {
return fmt.Errorf(string(b))
}
return nil
}
func (c *container) Checkpoints() ([]Checkpoint, error) {
dirs, err := ioutil.ReadDir(filepath.Join(c.bundle, "checkpoints"))
if err != nil {
return nil, err
}
var out []Checkpoint
for _, d := range dirs {
if !d.IsDir() {
continue
}
path := filepath.Join(c.bundle, "checkpoints", d.Name(), "config.json")
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
var cpt Checkpoint
if err := json.Unmarshal(data, &cpt); err != nil {
return nil, err
}
out = append(out, cpt)
}
return out, nil
}
func (c *container) Checkpoint(cpt Checkpoint) error {
if err := os.MkdirAll(filepath.Join(c.bundle, "checkpoints"), 0755); err != nil {
return err
}
path := filepath.Join(c.bundle, "checkpoints", cpt.Name)
if err := os.Mkdir(path, 0755); err != nil {
return err
}
f, err := os.Create(filepath.Join(path, "config.json"))
if err != nil {
return err
}
cpt.Created = time.Now()
err = json.NewEncoder(f).Encode(cpt)
f.Close()
if err != nil {
return err
}
args := []string{
"checkpoint",
"--image-path", path,
}
add := func(flags ...string) {
args = append(args, flags...)
}
add(c.runtimeArgs...)
if !cpt.Exit {
add("--leave-running")
}
if cpt.Shell {
add("--shell-job")
}
if cpt.Tcp {
add("--tcp-established")
}
if cpt.UnixSockets {
add("--ext-unix-sk")
}
add(c.id)
out, err := exec.Command(c.runtime, args...).CombinedOutput()
if err != nil {
return fmt.Errorf("%s: %s", err.Error(), string(out))
}
return err
}
func (c *container) DeleteCheckpoint(name string) error {
return os.RemoveAll(filepath.Join(c.bundle, "checkpoints", name))
}
func (c *container) Start(checkpoint string, s Stdio) (Process, error) {
processRoot := filepath.Join(c.root, c.id, InitProcessID)
if err := os.Mkdir(processRoot, 0755); err != nil {
return nil, err
}
cmd := exec.Command(c.shim,
c.id, c.bundle, c.runtime,
)
cmd.Dir = processRoot
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
spec, err := c.readSpec()
if err != nil {
return nil, err
}
config := &processConfig{
checkpoint: checkpoint,
root: processRoot,
id: InitProcessID,
c: c,
stdio: s,
spec: spec,
processSpec: specs.ProcessSpec(spec.Process),
}
p, err := newProcess(config)
if err != nil {
return nil, err
}
if err := c.startCmd(InitProcessID, cmd, p); err != nil {
return nil, err
}
return p, nil
}
func (c *container) Exec(pid string, pspec specs.ProcessSpec, s Stdio) (pp Process, err error) {
processRoot := filepath.Join(c.root, c.id, pid)
if err := os.Mkdir(processRoot, 0755); err != nil {
return nil, err
}
defer func() {
if err != nil {
c.RemoveProcess(pid)
}
}()
cmd := exec.Command(c.shim,
c.id, c.bundle, c.runtime,
)
cmd.Dir = processRoot
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
spec, err := c.readSpec()
if err != nil {
return nil, err
}
config := &processConfig{
exec: true,
id: pid,
root: processRoot,
c: c,
processSpec: pspec,
spec: spec,
stdio: s,
}
p, err := newProcess(config)
if err != nil {
return nil, err
}
if err := c.startCmd(pid, cmd, p); err != nil {
return nil, err
}
return p, nil
}
func (c *container) startCmd(pid string, cmd *exec.Cmd, p *process) error {
if err := cmd.Start(); err != nil {
if exErr, ok := err.(*exec.Error); ok {
if exErr.Err == exec.ErrNotFound || exErr.Err == os.ErrNotExist {
return fmt.Errorf("%s not installed on system", c.shim)
}
}
return err
}
if err := c.waitForStart(p, cmd); err != nil {
return err
}
c.processes[pid] = p
return nil
}
func (c *container) getLibctContainer() (libcontainer.Container, error) {
runtimeRoot := "/run/runc"
// Check that the root wasn't changed
for _, opt := range c.runtimeArgs {
if strings.HasPrefix(opt, "--root=") {
runtimeRoot = strings.TrimPrefix(opt, "--root=")
break
}
}
f, err := libcontainer.New(runtimeRoot, libcontainer.Cgroupfs)
if err != nil {
return nil, err
}
return f.Load(c.id)
}
func hostIDFromMap(id uint32, mp []ocs.IDMapping) int {
for _, m := range mp {
if (id >= m.ContainerID) && (id <= (m.ContainerID + m.Size - 1)) {
return int(m.HostID + (id - m.ContainerID))
}
}
return 0
}
func (c *container) Pids() ([]int, error) {
args := c.runtimeArgs
args = append(args, "ps", "--format=json", c.id)
out, err := exec.Command(c.runtime, args...).CombinedOutput()
if err != nil {
return nil, fmt.Errorf("%s", out)
}
var pids []int
if err := json.Unmarshal(out, &pids); err != nil {
return nil, err
}
return pids, nil
}
func (c *container) Stats() (*Stat, error) {
now := time.Now()
args := c.runtimeArgs
args = append(args, "events", "--stats", c.id)
out, err := exec.Command(c.runtime, args...).CombinedOutput()
if err != nil {
return nil, fmt.Errorf("%s", out)
}
s := struct {
Data *Stat `json:"data"`
}{}
if err := json.Unmarshal(out, &s); err != nil {
return nil, err
}
s.Data.Timestamp = now
return s.Data, nil
}
func (c *container) OOM() (OOM, error) {
container, err := c.getLibctContainer()
if err != nil {
if lerr, ok := err.(libcontainer.Error); ok {
// with oom registration sometimes the container can run, exit, and be destroyed
// faster than we can get the state back so we can just ignore this
if lerr.Code() == libcontainer.ContainerNotExists {
return nil, ErrContainerExited
}
}
return nil, err
}
state, err := container.State()
if err != nil {
return nil, err
}
memoryPath := state.CgroupPaths["memory"]
return c.getMemeoryEventFD(memoryPath)
}
// Status implements the runtime Container interface.
func (c *container) Status() (State, error) {
args := c.runtimeArgs
args = append(args, "state", c.id)
out, err := exec.Command(c.runtime, args...).CombinedOutput()
if err != nil {
return "", fmt.Errorf("%s", out)
}
// We only require the runtime json output to have a top level Status field.
var s struct {
Status State `json:"status"`
}
if err := json.Unmarshal(out, &s); err != nil {
return "", err
}
return s.Status, nil
}
func (c *container) getMemeoryEventFD(root string) (*oom, error) {
f, err := os.Open(filepath.Join(root, "memory.oom_control"))
if err != nil {
return nil, err
}
fd, _, serr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if serr != 0 {
f.Close()
return nil, serr
}
if err := c.writeEventFD(root, int(f.Fd()), int(fd)); err != nil {
syscall.Close(int(fd))
f.Close()
return nil, err
}
return &oom{
root: root,
id: c.id,
eventfd: int(fd),
control: f,
}, nil
}
func (c *container) writeEventFD(root string, cfd, efd int) error {
f, err := os.OpenFile(filepath.Join(root, "cgroup.event_control"), os.O_WRONLY, 0)
if err != nil {
return err
}
defer f.Close()
_, err = f.WriteString(fmt.Sprintf("%d %d", efd, cfd))
return err
}
type waitArgs struct {
pid int
err error
}
func (c *container) waitForStart(p *process, cmd *exec.Cmd) error {
wc := make(chan error, 1)
go func() {
for {
if _, err := p.getPidFromFile(); err != nil {
if os.IsNotExist(err) || err == errInvalidPidInt {
alive, err := isAlive(cmd)
if err != nil {
wc <- err
return
}
if !alive {
// runc could have failed to run the container so lets get the error
// out of the logs or the shim could have encountered an error
messages, err := readLogMessages(filepath.Join(p.root, "shim-log.json"))
if err != nil {
wc <- err
return
}
for _, m := range messages {
if m.Level == "error" {
wc <- fmt.Errorf("shim error: %v", m.Msg)
return
}
}
// no errors reported back from shim, check for runc/runtime errors
messages, err = readLogMessages(filepath.Join(p.root, "log.json"))
if err != nil {
if os.IsNotExist(err) {
err = ErrContainerNotStarted
}
wc <- err
return
}
for _, m := range messages {
if m.Level == "error" {
wc <- fmt.Errorf("oci runtime error: %v", m.Msg)
return
}
}
wc <- ErrContainerNotStarted
return
}
time.Sleep(15 * time.Millisecond)
continue
}
wc <- err
return
}
// the pid file was read successfully
wc <- nil
return
}
}()
select {
case err := <-wc:
if err != nil {
return err
}
return nil
case <-time.After(c.timeout):
cmd.Process.Kill()
cmd.Wait()
return ErrContainerStartTimeout
}
}
// isAlive checks if the shim that launched the container is still alive
func isAlive(cmd *exec.Cmd) (bool, error) {
if err := syscall.Kill(cmd.Process.Pid, 0); err != nil {
if err == syscall.ESRCH {
return false, nil
}
return false, err
}
return true, nil
}
type oom struct {
id string
root string
control *os.File
eventfd int
}
func (o *oom) ContainerID() string {
return o.id
}
func (o *oom) FD() int {
return o.eventfd
}
func (o *oom) Flush() {
buf := make([]byte, 8)
syscall.Read(o.eventfd, buf)
}
func (o *oom) Removed() bool {
_, err := os.Lstat(filepath.Join(o.root, "cgroup.event_control"))
return os.IsNotExist(err)
}
func (o *oom) Close() error {
err := syscall.Close(o.eventfd)
if cerr := o.control.Close(); err == nil {
err = cerr
}
return err
}
type message struct {
Level string `json:"level"`
Msg string `json:"msg"`
}
func readLogMessages(path string) ([]message, error) {
var out []message
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
dec := json.NewDecoder(f)
for {
var m message
if err := dec.Decode(&m); err != nil {
if err == io.EOF {
break
}
return nil, err
}
out = append(out, m)
}
return out, nil
}