Implement checkpoint / restore for shim

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2016-02-01 15:07:02 -08:00
parent 835f3b6a97
commit 277cc920a4
8 changed files with 258 additions and 93 deletions

View file

@ -149,22 +149,20 @@ func (s *apiServer) ListCheckpoint(ctx context.Context, r *types.ListCheckpointR
return nil, grpc.Errorf(codes.NotFound, "no such containers")
}
var out []*types.Checkpoint
/*
checkpoints, err := container.Checkpoints()
if err != nil {
return nil, err
}
for _, c := range checkpoints {
out = append(out, &types.Checkpoint{
Name: c.Name,
Tcp: c.Tcp,
Shell: c.Shell,
UnixSockets: c.UnixSockets,
// TODO: figure out timestamp
//Timestamp: c.Timestamp,
})
}
*/
checkpoints, err := container.Checkpoints()
if err != nil {
return nil, err
}
for _, c := range checkpoints {
out = append(out, &types.Checkpoint{
Name: c.Name,
Tcp: c.Tcp,
Shell: c.Shell,
UnixSockets: c.UnixSockets,
// TODO: figure out timestamp
//Timestamp: c.Timestamp,
})
}
return &types.ListCheckpointResponse{Checkpoints: out}, nil
}

View file

@ -11,13 +11,25 @@ import (
"github.com/docker/containerd/util"
)
var fexec bool
var (
fexec bool
fcheckpoint string
)
func init() {
flag.BoolVar(&fexec, "exec", false, "exec a process instead of starting the init")
flag.StringVar(&fcheckpoint, "checkpoint", "", "start container from an existing checkpoint")
flag.Parse()
}
func setupLogger() {
f, err := os.OpenFile("/tmp/shim.log", os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755)
if err != nil {
panic(err)
}
logrus.SetOutput(f)
}
// containerd-shim is a small shim that sits in front of a runc implementation
// that allows it to be repartented to init and handle reattach from the caller.
//
@ -38,7 +50,7 @@ func main() {
logrus.WithField("error", err).Fatal("shim: open exit pipe")
}
defer f.Close()
p, err := newProcess(flag.Arg(0), flag.Arg(1), fexec)
p, err := newProcess(flag.Arg(0), flag.Arg(1), fexec, fcheckpoint)
if err != nil {
logrus.WithField("error", err).Fatal("shim: create new process")
}

View file

@ -10,6 +10,7 @@ import (
"strconv"
"syscall"
"github.com/docker/containerd/runtime"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/specs"
)
@ -21,28 +22,59 @@ type process struct {
s specs.Process
exec bool
containerPid int
checkpoint *runtime.Checkpoint
}
func newProcess(id, bundle string, exec bool) (*process, error) {
f, err := os.Open("process.json")
if err != nil {
return nil, err
}
defer f.Close()
func newProcess(id, bundle string, exec bool, checkpoint string) (*process, error) {
p := &process{
id: id,
bundle: bundle,
exec: exec,
}
if err := json.NewDecoder(f).Decode(&p.s); err != nil {
s, err := loadProcess()
if err != nil {
return nil, err
}
p.s = *s
if checkpoint != "" {
cpt, err := loadCheckpoint(bundle, checkpoint)
if err != nil {
return nil, err
}
p.checkpoint = cpt
}
if err := p.openIO(); err != nil {
return nil, err
}
return p, nil
}
func loadProcess() (*specs.Process, error) {
f, err := os.Open("process.json")
if err != nil {
return nil, err
}
defer f.Close()
var s specs.Process
if err := json.NewDecoder(f).Decode(&s); err != nil {
return nil, err
}
return &s, nil
}
func loadCheckpoint(bundle, name string) (*runtime.Checkpoint, error) {
f, err := os.Open(filepath.Join(bundle, "checkpoints", name, "config.json"))
if err != nil {
return nil, err
}
defer f.Close()
var cpt runtime.Checkpoint
if err := json.NewDecoder(f).Decode(&cpt); err != nil {
return nil, err
}
return &cpt, nil
}
func (p *process) start() error {
cwd, err := os.Getwd()
if err != nil {
@ -53,17 +85,37 @@ func (p *process) start() error {
}
if p.exec {
args = append(args, "exec",
"--process", filepath.Join(cwd, "process.json"))
"--process", filepath.Join(cwd, "process.json"),
"--console", p.stdio.console,
)
} else if p.checkpoint != nil {
args = append(args, "restore",
"--image-path", filepath.Join(p.bundle, "checkpoints", p.checkpoint.Name),
)
add := func(flags ...string) {
args = append(args, flags...)
}
if p.checkpoint.Shell {
add("--shell-job")
}
if p.checkpoint.Tcp {
add("--tcp-established")
}
if p.checkpoint.UnixSockets {
add("--ext-unix-sk")
}
} else {
args = append(args, "start",
"--bundle", p.bundle)
"--bundle", p.bundle,
"--console", p.stdio.console,
)
}
args = append(args,
"-d",
"--console", p.stdio.console,
"--pid-file", filepath.Join(cwd, "pid"),
)
cmd := exec.Command("runc", args...)
cmd.Dir = p.bundle
cmd.Stdin = p.stdio.stdin
cmd.Stdout = p.stdio.stdout
cmd.Stderr = p.stdio.stderr
@ -114,9 +166,7 @@ func (p *process) openIO() error {
if err != nil {
return err
}
go func() {
io.Copy(console, stdin)
}()
go io.Copy(console, stdin)
stdout, err := os.OpenFile("stdout", syscall.O_RDWR, 0)
if err != nil {
return err
@ -127,21 +177,75 @@ func (p *process) openIO() error {
}()
return nil
}
i, err := p.initializeIO(int(p.s.User.UID))
if err != nil {
return err
}
// non-tty
for name, dest := range map[string]**os.File{
"stdin": &p.stdio.stdin,
"stdout": &p.stdio.stdout,
"stderr": &p.stdio.stderr,
for name, dest := range map[string]func(f *os.File){
"stdin": func(f *os.File) {
go io.Copy(i.Stdin, f)
},
"stdout": func(f *os.File) {
go io.Copy(f, i.Stdout)
},
"stderr": func(f *os.File) {
go io.Copy(f, i.Stderr)
},
} {
f, err := os.OpenFile(name, syscall.O_RDWR, 0)
if err != nil {
return err
}
*dest = f
dest(f)
}
return nil
}
type IO struct {
Stdin io.WriteCloser
Stdout io.ReadCloser
Stderr io.ReadCloser
}
func (p *process) initializeIO(rootuid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}
// cleanup in case of an error
defer func() {
if err != nil {
for _, fd := range fds {
syscall.Close(int(fd))
}
}
}()
// STDIN
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.stdio.stdin, i.Stdin = r, w
// STDOUT
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.stdio.stdout, i.Stdout = w, r
// STDERR
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.stdio.stderr, i.Stderr = w, r
// change ownership of the pipes incase we are in a user namespace
for _, fd := range fds {
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
return nil, err
}
}
return i, nil
}
func (p *process) Close() error {
return p.stdio.Close()
}

View file

@ -16,6 +16,7 @@ var checkpointCommand = cli.Command{
Subcommands: []cli.Command{
listCheckpointCommand,
createCheckpointCommand,
deleteCheckpointCommand,
},
Action: listCheckpoints,
}
@ -86,7 +87,11 @@ var createCheckpointCommand = cli.Command{
if _, err := c.CreateCheckpoint(netcontext.Background(), &types.CreateCheckpointRequest{
Id: containerID,
Checkpoint: &types.Checkpoint{
Name: name,
Name: name,
Exit: context.Bool("exit"),
Tcp: context.Bool("tcp"),
Shell: context.Bool("shell"),
UnixSockets: context.Bool("unix-sockets"),
},
}); err != nil {
fatal(err.Error(), 1)

View file

@ -7,6 +7,7 @@ import (
"os/exec"
"path/filepath"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/specs"
@ -18,7 +19,7 @@ type Container interface {
// Path returns the path to the bundle
Path() string
// Start starts the init process of the container
Start() (Process, error)
Start(checkpoint string) (Process, error)
// Exec starts another process in an existing container
Exec(string, specs.Process) (Process, error)
// Delete removes the container's state and any resources
@ -31,18 +32,16 @@ type Container interface {
Resume() error
// Pause pauses a running container
Pause() error
// RemoveProcess removes the specified process from the container
RemoveProcess(string) error
// Checkpoints returns all the checkpoints for a container
// Checkpoints() ([]Checkpoint, error)
Checkpoints() ([]Checkpoint, error)
// Checkpoint creates a new checkpoint
// Checkpoint(Checkpoint) error
Checkpoint(Checkpoint) error
// DeleteCheckpoint deletes the checkpoint for the provided name
// DeleteCheckpoint(name string) error
// Restore restores the container to that of the checkpoint provided by name
// Restore(name string) error
DeleteCheckpoint(name string) error
// Stats returns realtime container stats and resource information
// Stats() (*Stat, error)
// OOM signals the channel if the container received an OOM notification
// Stats() (*Stat, error) // OOM signals the channel if the container received an OOM notification
// OOM() (<-chan struct{}, error)
}
@ -138,12 +137,12 @@ func (c *container) Path() string {
return c.bundle
}
func (c *container) Start() (Process, error) {
func (c *container) Start(checkpoint string) (Process, error) {
processRoot := filepath.Join(c.root, c.id, InitProcessID)
if err := os.MkdirAll(processRoot, 0755); err != nil {
return nil, err
}
cmd := exec.Command("containerd-shim", c.id, c.bundle)
cmd := exec.Command("containerd-shim", "-checkpoint", checkpoint, c.id, c.bundle)
cmd.Dir = processRoot
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
@ -225,3 +224,72 @@ func (c *container) RemoveProcess(pid string) error {
delete(c.processes, pid)
return nil
}
func (c *container) Checkpoints() ([]Checkpoint, error) {
dirs, err := ioutil.ReadDir(filepath.Join(c.bundle, "checkpoints"))
if err != nil {
return nil, err
}
var out []Checkpoint
for _, d := range dirs {
if !d.IsDir() {
continue
}
path := filepath.Join(c.bundle, "checkpoints", d.Name(), "config.json")
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
var cpt Checkpoint
if err := json.Unmarshal(data, &cpt); err != nil {
return nil, err
}
out = append(out, cpt)
}
return out, nil
}
func (c *container) Checkpoint(cpt Checkpoint) error {
if err := os.MkdirAll(filepath.Join(c.bundle, "checkpoints"), 0755); err != nil {
return err
}
path := filepath.Join(c.bundle, "checkpoints", cpt.Name)
if err := os.Mkdir(path, 0755); err != nil {
return err
}
f, err := os.Create(filepath.Join(path, "config.json"))
if err != nil {
return err
}
cpt.Created = time.Now()
err = json.NewEncoder(f).Encode(cpt)
f.Close()
if err != nil {
return err
}
args := []string{
"--id", c.id,
"checkpoint",
"--image-path", path,
}
add := func(flags ...string) {
args = append(args, flags...)
}
if !cpt.Exit {
add("--leave-running")
}
if cpt.Shell {
add("--shell-job")
}
if cpt.Tcp {
add("--tcp-established")
}
if cpt.UnixSockets {
add("--ext-unix-sk")
}
return exec.Command("runc", args...).Run()
}
func (c *container) DeleteCheckpoint(name string) error {
return os.RemoveAll(filepath.Join(c.bundle, "checkpoints", name))
}

View file

@ -48,15 +48,15 @@ type Stat struct {
type Checkpoint struct {
// Timestamp is the time that checkpoint happened
Timestamp time.Time
Created time.Time `json:"created"`
// Name is the name of the checkpoint
Name string
Name string `json:"name"`
// Tcp checkpoints open tcp connections
Tcp bool
Tcp bool `json:"tcp"`
// UnixSockets persists unix sockets in the checkpoint
UnixSockets bool
UnixSockets bool `json:"unixSockets"`
// Shell persists tty sessions in the checkpoint
Shell bool
Shell bool `json:"shell"`
// Exit exits the container after the checkpoint is finished
Exit bool
Exit bool `json:"exit"`
}

View file

@ -5,14 +5,11 @@ type CreateCheckpointEvent struct {
}
func (h *CreateCheckpointEvent) Handle(e *Event) error {
/*
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
*/
return nil
// return i.container.Checkpoint(*e.Checkpoint)
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
return i.container.Checkpoint(*e.Checkpoint)
}
type DeleteCheckpointEvent struct {
@ -20,12 +17,9 @@ type DeleteCheckpointEvent struct {
}
func (h *DeleteCheckpointEvent) Handle(e *Event) error {
/*
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
*/
return nil
// return i.container.DeleteCheckpoint(e.Checkpoint.Name)
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
return i.container.DeleteCheckpoint(e.Checkpoint.Name)
}

View file

@ -37,30 +37,14 @@ type worker struct {
func (w *worker) Start() {
defer w.wg.Done()
for t := range w.s.tasks {
var (
err error
process runtime.Process
started = time.Now()
)
if t.Checkpoint != "" {
/*
if err := t.Container.Restore(t.Checkpoint); err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
*/
} else {
process, err = t.Container.Start()
if err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
started := time.Now()
process, err := t.Container.Start(t.Checkpoint)
if err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
/*
if w.s.notifier != nil {