Add basic checkpoint and restore support

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2015-12-03 16:07:53 -08:00
parent c1eb9ac90b
commit ae9b2bafd5
9 changed files with 231 additions and 29 deletions

View File

@ -19,17 +19,25 @@ func NewServer(supervisor *containerd.Supervisor) http.Handler {
supervisor: supervisor,
r: r,
}
// TODO: add container stats
// TODO: add container checkpoint
// TODO: add container restore
// TODO: set prctl child subreaper
// process handlers
r.HandleFunc("/containers/{id:.*}/process/{pid:.*}", s.signalPid).Methods("POST")
r.HandleFunc("/containers/{id:.*}/process", s.addProcess).Methods("PUT")
// checkpoint and restore handlers
// TODO: PUT handler for adding a checkpoint to containerd??
r.HandleFunc("/containers/{id:.*}/checkpoint/{name:.*}", s.createCheckpoint).Methods("POST")
// r.HandleFunc("/containers/{id:.*}/checkpoint/{cid:.*}", s.deleteCheckpoint).Methods("DELETE")
r.HandleFunc("/containers/{id:.*}/checkpoint", s.listCheckpoints).Methods("GET")
// container handlers
r.HandleFunc("/containers/{id:.*}", s.createContainer).Methods("POST")
r.HandleFunc("/containers/{id:.*}", s.updateContainer).Methods("PATCH")
// internal method for replaying the journal
r.HandleFunc("/event", s.event).Methods("POST")
r.HandleFunc("/events", s.events).Methods("GET")
// containerd handlers
r.HandleFunc("/state", s.state).Methods("GET")
return s
}
@ -252,6 +260,12 @@ func (s *server) createContainer(w http.ResponseWriter, r *http.Request) {
e := containerd.NewEvent(containerd.StartContainerEventType)
e.ID = id
e.BundlePath = c.BundlePath
if c.Checkpoint != nil {
e.Checkpoint = &runtime.Checkpoint{
Name: c.Checkpoint.Name,
Path: c.Checkpoint.Path,
}
}
e.Stdio = &runtime.Stdio{
Stderr: c.Stderr,
Stdout: c.Stdout,
@ -267,3 +281,71 @@ func (s *server) createContainer(w http.ResponseWriter, r *http.Request) {
}
w.WriteHeader(http.StatusCreated)
}
func (s *server) listCheckpoints(w http.ResponseWriter, r *http.Request) {
id := mux.Vars(r)["id"]
e := containerd.NewEvent(containerd.GetContainerEventType)
s.supervisor.SendEvent(e)
if err := <-e.Err; err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
var container runtime.Container
for _, c := range e.Containers {
if c.ID() == id {
container = c
break
}
}
if container == nil {
http.Error(w, "container not found", http.StatusNotFound)
return
}
checkpoints, err := container.Checkpoints()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
out := []Checkpoint{}
for _, c := range checkpoints {
out = append(out, Checkpoint{
Path: c.Path,
Name: c.Name,
Tcp: c.Tcp,
Shell: c.Shell,
UnixSockets: c.UnixSockets,
})
}
}
func (s *server) createCheckpoint(w http.ResponseWriter, r *http.Request) {
var (
vars = mux.Vars(r)
id = vars["id"]
name = vars["name"]
)
var cp Checkpoint
if err := json.NewDecoder(r.Body).Decode(&cp); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
e := containerd.NewEvent(containerd.CreateCheckpointEventType)
e.ID = id
e.Checkpoint = &runtime.Checkpoint{
Name: name,
Path: cp.Path,
Running: cp.Running,
Tcp: cp.Tcp,
UnixSockets: cp.UnixSockets,
Shell: cp.Shell,
}
s.supervisor.SendEvent(e)
if err := <-e.Err; err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusCreated)
}
func (s *server) deleteCheckpoint(w http.ResponseWriter, r *http.Request) {
}

View File

@ -28,6 +28,7 @@ type Container struct {
Stdout string `json:"stdout,omitempty"`
Stderr string `json:"stderr,omitempty"`
State *ContainerState `json:"state,omitempty"`
Checkpoint *Checkpoint `json:"checkpoint,omitempty"`
}
type User struct {
@ -54,3 +55,12 @@ type Event struct {
ID string `json:"id,omitempty"`
Status int `json:"status,omitempty"`
}
type Checkpoint struct {
Name string `json:"name,omitempty"`
Path string `json:"path"`
Running bool `json:"running,omitempty"`
Tcp bool `json:"tcp"`
UnixSockets bool `json:"unixSockets"`
Shell bool `json:"shell"`
}

13
checkpoint.go Normal file
View File

@ -0,0 +1,13 @@
package containerd
type CreateCheckpointEvent struct {
s *Supervisor
}
func (h *CreateCheckpointEvent) Handle(e *Event) error {
container, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
return container.Checkpoint(*e.Checkpoint)
}

View File

@ -11,14 +11,15 @@ import (
type EventType string
const (
ExecExitEventType EventType = "execExit"
ExitEventType EventType = "exit"
StartContainerEventType EventType = "startContainer"
DeleteEventType EventType = "deleteContainerEvent"
GetContainerEventType EventType = "getContainer"
SignalEventType EventType = "signal"
AddProcessEventType EventType = "addProcess"
UpdateContainerEventType EventType = "updateContainer"
ExecExitEventType EventType = "execExit"
ExitEventType EventType = "exit"
StartContainerEventType EventType = "startContainer"
DeleteEventType EventType = "deleteContainerEvent"
GetContainerEventType EventType = "getContainer"
SignalEventType EventType = "signal"
AddProcessEventType EventType = "addProcess"
UpdateContainerEventType EventType = "updateContainer"
CreateCheckpointEventType EventType = "createCheckpoint"
)
func NewEvent(t EventType) *Event {
@ -41,6 +42,7 @@ type Event struct {
Process *specs.Process `json:"process,omitempty"`
State *runtime.State `json:"state,omitempty"`
Containers []runtime.Container `json:"-"`
Checkpoint *runtime.Checkpoint `json:"checkpoint,omitempty"`
Err chan error `json:"-"`
}

View File

@ -12,6 +12,7 @@ import (
"strconv"
"strings"
"syscall"
"time"
"github.com/docker/containerd/runtime"
"github.com/opencontainers/runc/libcontainer"
@ -190,6 +191,57 @@ type libcontainerContainer struct {
exitStatus int
exited bool
path string
checkpoints map[string]runtime.Checkpoint
}
func (c *libcontainerContainer) Checkpoints() ([]runtime.Checkpoint, error) {
out := []runtime.Checkpoint{}
for _, cp := range c.checkpoints {
out = append(out, cp)
}
return out, nil
}
func (c *libcontainerContainer) Checkpoint(cp runtime.Checkpoint) error {
opts := c.createCheckpointOpts(&cp)
if err := os.MkdirAll(opts.ImagesDirectory, 0755); err != nil {
return err
}
if err := c.c.Checkpoint(opts); err != nil {
return err
}
cp.Timestamp = time.Now()
c.checkpoints[cp.Name] = cp
return nil
}
func (c *libcontainerContainer) createCheckpointOpts(cp *runtime.Checkpoint) *libcontainer.CriuOpts {
opts := libcontainer.CriuOpts{}
opts.LeaveRunning = cp.Running
opts.ShellJob = cp.Shell
opts.TcpEstablished = cp.Tcp
opts.ExternalUnixConnections = cp.UnixSockets
if cp.Path == "" {
cp.Path = filepath.Join(c.path, "checkpoints", cp.Name)
}
opts.ImagesDirectory = cp.Path
return &opts
}
func (c *libcontainerContainer) Restore(path, name string) error {
if path == "" {
path = filepath.Join(c.path, "checkpoints", name)
}
var opts libcontainer.CriuOpts
if cp, ok := c.checkpoints[name]; ok {
opts = *c.createCheckpointOpts(&cp)
} else {
opts.ImagesDirectory = path
}
if err := c.c.Restore(c.initProcess.process, &opts); err != nil {
return err
}
return nil
}
func (c *libcontainerContainer) Resume() error {
@ -300,6 +352,7 @@ func (r *libcontainerRuntime) Create(id, bundlePath string, stdio *runtime.Stdio
c := &libcontainerContainer{
c: container,
additionalProcesses: make(map[int]*libcontainerProcess),
checkpoints: make(map[string]runtime.Checkpoint),
initProcess: &libcontainerProcess{
process: process,
spec: spec.Process,

View File

@ -2,6 +2,7 @@ package runtime
import (
"os"
"time"
"github.com/opencontainers/specs"
)
@ -11,6 +12,7 @@ type Process interface {
Spec() specs.Process
Signal(os.Signal) error
}
type Status string
const (
@ -27,6 +29,16 @@ type Stdio struct {
Stdout string `json:"stdout,omitempty"`
}
type Checkpoint struct {
Timestamp time.Time `json:"timestamp,omitempty"`
Path string `json:"path,omitempty"`
Name string `json:"name,omitempty"`
Tcp bool `json:"tcp"`
UnixSockets bool `json:"unixSockets"`
Shell bool `json:"shell"`
Running bool `json:"running,omitempty"`
}
type Container interface {
// ID returns the container ID
ID() string
@ -50,4 +62,10 @@ type Container interface {
Resume() error
// Pause pauses a running container
Pause() error
Checkpoints() ([]Checkpoint, error)
Checkpoint(Checkpoint) error
Restore(path, name string) error
}

View File

@ -11,9 +11,16 @@ func (h *StartEvent) Handle(e *Event) error {
}
h.s.containers[e.ID] = container
ContainersCounter.Inc(1)
h.s.tasks <- &StartTask{
task := &StartTask{
Err: e.Err,
Container: container,
}
if e.Checkpoint != nil {
task.Checkpoint = &Checkpoint{
Name: e.Checkpoint.Name,
Path: e.Checkpoint.Path,
}
}
h.s.tasks <- task
return errDeferedResponse
}

View File

@ -40,14 +40,15 @@ func NewSupervisor(stateDir string, tasks chan *StartTask) (*Supervisor, error)
}
// register default event handlers
s.handlers = map[EventType]Handler{
ExecExitEventType: &ExecExitEvent{s},
ExitEventType: &ExitEvent{s},
StartContainerEventType: &StartEvent{s},
DeleteEventType: &DeleteEvent{s},
GetContainerEventType: &GetContainersEvent{s},
SignalEventType: &SignalEvent{s},
AddProcessEventType: &AddProcessEvent{s},
UpdateContainerEventType: &UpdateEvent{s},
ExecExitEventType: &ExecExitEvent{s},
ExitEventType: &ExitEvent{s},
StartContainerEventType: &StartEvent{s},
DeleteEventType: &DeleteEvent{s},
GetContainerEventType: &GetContainersEvent{s},
SignalEventType: &SignalEvent{s},
AddProcessEventType: &AddProcessEvent{s},
UpdateContainerEventType: &UpdateEvent{s},
CreateCheckpointEventType: &CreateCheckpointEvent{s},
}
// start the container workers for concurrent container starts
return s, nil

View File

@ -11,9 +11,15 @@ type Worker interface {
Start()
}
type Checkpoint struct {
Path string
Name string
}
type StartTask struct {
Container runtime.Container
Err chan error
Container runtime.Container
Checkpoint *Checkpoint
Err chan error
}
func NewWorker(s *Supervisor, wg *sync.WaitGroup) Worker {
@ -32,12 +38,22 @@ func (w *worker) Start() {
defer w.wg.Done()
for t := range w.s.tasks {
started := time.Now()
if err := t.Container.Start(); err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
if t.Checkpoint != nil {
if err := t.Container.Restore(t.Checkpoint.Path, t.Checkpoint.Name); err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
} else {
if err := t.Container.Start(); err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
}
ContainerStartTimer.UpdateSince(started)
t.Err <- nil