Add basic checkpoint and restore support
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
c1eb9ac90b
commit
ae9b2bafd5
9 changed files with 231 additions and 29 deletions
|
@ -19,17 +19,25 @@ func NewServer(supervisor *containerd.Supervisor) http.Handler {
|
||||||
supervisor: supervisor,
|
supervisor: supervisor,
|
||||||
r: r,
|
r: r,
|
||||||
}
|
}
|
||||||
// TODO: add container stats
|
// process handlers
|
||||||
// TODO: add container checkpoint
|
|
||||||
// TODO: add container restore
|
|
||||||
// TODO: set prctl child subreaper
|
|
||||||
r.HandleFunc("/containers/{id:.*}/process/{pid:.*}", s.signalPid).Methods("POST")
|
r.HandleFunc("/containers/{id:.*}/process/{pid:.*}", s.signalPid).Methods("POST")
|
||||||
r.HandleFunc("/containers/{id:.*}/process", s.addProcess).Methods("PUT")
|
r.HandleFunc("/containers/{id:.*}/process", s.addProcess).Methods("PUT")
|
||||||
|
|
||||||
|
// checkpoint and restore handlers
|
||||||
|
// TODO: PUT handler for adding a checkpoint to containerd??
|
||||||
|
r.HandleFunc("/containers/{id:.*}/checkpoint/{name:.*}", s.createCheckpoint).Methods("POST")
|
||||||
|
// r.HandleFunc("/containers/{id:.*}/checkpoint/{cid:.*}", s.deleteCheckpoint).Methods("DELETE")
|
||||||
|
r.HandleFunc("/containers/{id:.*}/checkpoint", s.listCheckpoints).Methods("GET")
|
||||||
|
|
||||||
|
// container handlers
|
||||||
r.HandleFunc("/containers/{id:.*}", s.createContainer).Methods("POST")
|
r.HandleFunc("/containers/{id:.*}", s.createContainer).Methods("POST")
|
||||||
r.HandleFunc("/containers/{id:.*}", s.updateContainer).Methods("PATCH")
|
r.HandleFunc("/containers/{id:.*}", s.updateContainer).Methods("PATCH")
|
||||||
|
|
||||||
// internal method for replaying the journal
|
// internal method for replaying the journal
|
||||||
r.HandleFunc("/event", s.event).Methods("POST")
|
r.HandleFunc("/event", s.event).Methods("POST")
|
||||||
r.HandleFunc("/events", s.events).Methods("GET")
|
r.HandleFunc("/events", s.events).Methods("GET")
|
||||||
|
|
||||||
|
// containerd handlers
|
||||||
r.HandleFunc("/state", s.state).Methods("GET")
|
r.HandleFunc("/state", s.state).Methods("GET")
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
@ -252,6 +260,12 @@ func (s *server) createContainer(w http.ResponseWriter, r *http.Request) {
|
||||||
e := containerd.NewEvent(containerd.StartContainerEventType)
|
e := containerd.NewEvent(containerd.StartContainerEventType)
|
||||||
e.ID = id
|
e.ID = id
|
||||||
e.BundlePath = c.BundlePath
|
e.BundlePath = c.BundlePath
|
||||||
|
if c.Checkpoint != nil {
|
||||||
|
e.Checkpoint = &runtime.Checkpoint{
|
||||||
|
Name: c.Checkpoint.Name,
|
||||||
|
Path: c.Checkpoint.Path,
|
||||||
|
}
|
||||||
|
}
|
||||||
e.Stdio = &runtime.Stdio{
|
e.Stdio = &runtime.Stdio{
|
||||||
Stderr: c.Stderr,
|
Stderr: c.Stderr,
|
||||||
Stdout: c.Stdout,
|
Stdout: c.Stdout,
|
||||||
|
@ -267,3 +281,71 @@ func (s *server) createContainer(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
w.WriteHeader(http.StatusCreated)
|
w.WriteHeader(http.StatusCreated)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *server) listCheckpoints(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
e := containerd.NewEvent(containerd.GetContainerEventType)
|
||||||
|
s.supervisor.SendEvent(e)
|
||||||
|
if err := <-e.Err; err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var container runtime.Container
|
||||||
|
for _, c := range e.Containers {
|
||||||
|
if c.ID() == id {
|
||||||
|
container = c
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if container == nil {
|
||||||
|
http.Error(w, "container not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
checkpoints, err := container.Checkpoints()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out := []Checkpoint{}
|
||||||
|
for _, c := range checkpoints {
|
||||||
|
out = append(out, Checkpoint{
|
||||||
|
Path: c.Path,
|
||||||
|
Name: c.Name,
|
||||||
|
Tcp: c.Tcp,
|
||||||
|
Shell: c.Shell,
|
||||||
|
UnixSockets: c.UnixSockets,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *server) createCheckpoint(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var (
|
||||||
|
vars = mux.Vars(r)
|
||||||
|
id = vars["id"]
|
||||||
|
name = vars["name"]
|
||||||
|
)
|
||||||
|
var cp Checkpoint
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&cp); err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
e := containerd.NewEvent(containerd.CreateCheckpointEventType)
|
||||||
|
e.ID = id
|
||||||
|
e.Checkpoint = &runtime.Checkpoint{
|
||||||
|
Name: name,
|
||||||
|
Path: cp.Path,
|
||||||
|
Running: cp.Running,
|
||||||
|
Tcp: cp.Tcp,
|
||||||
|
UnixSockets: cp.UnixSockets,
|
||||||
|
Shell: cp.Shell,
|
||||||
|
}
|
||||||
|
s.supervisor.SendEvent(e)
|
||||||
|
if err := <-e.Err; err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *server) deleteCheckpoint(w http.ResponseWriter, r *http.Request) {
|
||||||
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ type Container struct {
|
||||||
Stdout string `json:"stdout,omitempty"`
|
Stdout string `json:"stdout,omitempty"`
|
||||||
Stderr string `json:"stderr,omitempty"`
|
Stderr string `json:"stderr,omitempty"`
|
||||||
State *ContainerState `json:"state,omitempty"`
|
State *ContainerState `json:"state,omitempty"`
|
||||||
|
Checkpoint *Checkpoint `json:"checkpoint,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type User struct {
|
type User struct {
|
||||||
|
@ -54,3 +55,12 @@ type Event struct {
|
||||||
ID string `json:"id,omitempty"`
|
ID string `json:"id,omitempty"`
|
||||||
Status int `json:"status,omitempty"`
|
Status int `json:"status,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Checkpoint struct {
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
Running bool `json:"running,omitempty"`
|
||||||
|
Tcp bool `json:"tcp"`
|
||||||
|
UnixSockets bool `json:"unixSockets"`
|
||||||
|
Shell bool `json:"shell"`
|
||||||
|
}
|
||||||
|
|
13
checkpoint.go
Normal file
13
checkpoint.go
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
package containerd
|
||||||
|
|
||||||
|
type CreateCheckpointEvent struct {
|
||||||
|
s *Supervisor
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *CreateCheckpointEvent) Handle(e *Event) error {
|
||||||
|
container, ok := h.s.containers[e.ID]
|
||||||
|
if !ok {
|
||||||
|
return ErrContainerNotFound
|
||||||
|
}
|
||||||
|
return container.Checkpoint(*e.Checkpoint)
|
||||||
|
}
|
2
event.go
2
event.go
|
@ -19,6 +19,7 @@ const (
|
||||||
SignalEventType EventType = "signal"
|
SignalEventType EventType = "signal"
|
||||||
AddProcessEventType EventType = "addProcess"
|
AddProcessEventType EventType = "addProcess"
|
||||||
UpdateContainerEventType EventType = "updateContainer"
|
UpdateContainerEventType EventType = "updateContainer"
|
||||||
|
CreateCheckpointEventType EventType = "createCheckpoint"
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewEvent(t EventType) *Event {
|
func NewEvent(t EventType) *Event {
|
||||||
|
@ -41,6 +42,7 @@ type Event struct {
|
||||||
Process *specs.Process `json:"process,omitempty"`
|
Process *specs.Process `json:"process,omitempty"`
|
||||||
State *runtime.State `json:"state,omitempty"`
|
State *runtime.State `json:"state,omitempty"`
|
||||||
Containers []runtime.Container `json:"-"`
|
Containers []runtime.Container `json:"-"`
|
||||||
|
Checkpoint *runtime.Checkpoint `json:"checkpoint,omitempty"`
|
||||||
Err chan error `json:"-"`
|
Err chan error `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/docker/containerd/runtime"
|
"github.com/docker/containerd/runtime"
|
||||||
"github.com/opencontainers/runc/libcontainer"
|
"github.com/opencontainers/runc/libcontainer"
|
||||||
|
@ -190,6 +191,57 @@ type libcontainerContainer struct {
|
||||||
exitStatus int
|
exitStatus int
|
||||||
exited bool
|
exited bool
|
||||||
path string
|
path string
|
||||||
|
checkpoints map[string]runtime.Checkpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *libcontainerContainer) Checkpoints() ([]runtime.Checkpoint, error) {
|
||||||
|
out := []runtime.Checkpoint{}
|
||||||
|
for _, cp := range c.checkpoints {
|
||||||
|
out = append(out, cp)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *libcontainerContainer) Checkpoint(cp runtime.Checkpoint) error {
|
||||||
|
opts := c.createCheckpointOpts(&cp)
|
||||||
|
if err := os.MkdirAll(opts.ImagesDirectory, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := c.c.Checkpoint(opts); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
cp.Timestamp = time.Now()
|
||||||
|
c.checkpoints[cp.Name] = cp
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *libcontainerContainer) createCheckpointOpts(cp *runtime.Checkpoint) *libcontainer.CriuOpts {
|
||||||
|
opts := libcontainer.CriuOpts{}
|
||||||
|
opts.LeaveRunning = cp.Running
|
||||||
|
opts.ShellJob = cp.Shell
|
||||||
|
opts.TcpEstablished = cp.Tcp
|
||||||
|
opts.ExternalUnixConnections = cp.UnixSockets
|
||||||
|
if cp.Path == "" {
|
||||||
|
cp.Path = filepath.Join(c.path, "checkpoints", cp.Name)
|
||||||
|
}
|
||||||
|
opts.ImagesDirectory = cp.Path
|
||||||
|
return &opts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *libcontainerContainer) Restore(path, name string) error {
|
||||||
|
if path == "" {
|
||||||
|
path = filepath.Join(c.path, "checkpoints", name)
|
||||||
|
}
|
||||||
|
var opts libcontainer.CriuOpts
|
||||||
|
if cp, ok := c.checkpoints[name]; ok {
|
||||||
|
opts = *c.createCheckpointOpts(&cp)
|
||||||
|
} else {
|
||||||
|
opts.ImagesDirectory = path
|
||||||
|
}
|
||||||
|
if err := c.c.Restore(c.initProcess.process, &opts); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *libcontainerContainer) Resume() error {
|
func (c *libcontainerContainer) Resume() error {
|
||||||
|
@ -300,6 +352,7 @@ func (r *libcontainerRuntime) Create(id, bundlePath string, stdio *runtime.Stdio
|
||||||
c := &libcontainerContainer{
|
c := &libcontainerContainer{
|
||||||
c: container,
|
c: container,
|
||||||
additionalProcesses: make(map[int]*libcontainerProcess),
|
additionalProcesses: make(map[int]*libcontainerProcess),
|
||||||
|
checkpoints: make(map[string]runtime.Checkpoint),
|
||||||
initProcess: &libcontainerProcess{
|
initProcess: &libcontainerProcess{
|
||||||
process: process,
|
process: process,
|
||||||
spec: spec.Process,
|
spec: spec.Process,
|
||||||
|
|
|
@ -2,6 +2,7 @@ package runtime
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/opencontainers/specs"
|
"github.com/opencontainers/specs"
|
||||||
)
|
)
|
||||||
|
@ -11,6 +12,7 @@ type Process interface {
|
||||||
Spec() specs.Process
|
Spec() specs.Process
|
||||||
Signal(os.Signal) error
|
Signal(os.Signal) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type Status string
|
type Status string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -27,6 +29,16 @@ type Stdio struct {
|
||||||
Stdout string `json:"stdout,omitempty"`
|
Stdout string `json:"stdout,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Checkpoint struct {
|
||||||
|
Timestamp time.Time `json:"timestamp,omitempty"`
|
||||||
|
Path string `json:"path,omitempty"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
Tcp bool `json:"tcp"`
|
||||||
|
UnixSockets bool `json:"unixSockets"`
|
||||||
|
Shell bool `json:"shell"`
|
||||||
|
Running bool `json:"running,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type Container interface {
|
type Container interface {
|
||||||
// ID returns the container ID
|
// ID returns the container ID
|
||||||
ID() string
|
ID() string
|
||||||
|
@ -50,4 +62,10 @@ type Container interface {
|
||||||
Resume() error
|
Resume() error
|
||||||
// Pause pauses a running container
|
// Pause pauses a running container
|
||||||
Pause() error
|
Pause() error
|
||||||
|
|
||||||
|
Checkpoints() ([]Checkpoint, error)
|
||||||
|
|
||||||
|
Checkpoint(Checkpoint) error
|
||||||
|
|
||||||
|
Restore(path, name string) error
|
||||||
}
|
}
|
||||||
|
|
9
start.go
9
start.go
|
@ -11,9 +11,16 @@ func (h *StartEvent) Handle(e *Event) error {
|
||||||
}
|
}
|
||||||
h.s.containers[e.ID] = container
|
h.s.containers[e.ID] = container
|
||||||
ContainersCounter.Inc(1)
|
ContainersCounter.Inc(1)
|
||||||
h.s.tasks <- &StartTask{
|
task := &StartTask{
|
||||||
Err: e.Err,
|
Err: e.Err,
|
||||||
Container: container,
|
Container: container,
|
||||||
}
|
}
|
||||||
|
if e.Checkpoint != nil {
|
||||||
|
task.Checkpoint = &Checkpoint{
|
||||||
|
Name: e.Checkpoint.Name,
|
||||||
|
Path: e.Checkpoint.Path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.s.tasks <- task
|
||||||
return errDeferedResponse
|
return errDeferedResponse
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,7 @@ func NewSupervisor(stateDir string, tasks chan *StartTask) (*Supervisor, error)
|
||||||
SignalEventType: &SignalEvent{s},
|
SignalEventType: &SignalEvent{s},
|
||||||
AddProcessEventType: &AddProcessEvent{s},
|
AddProcessEventType: &AddProcessEvent{s},
|
||||||
UpdateContainerEventType: &UpdateEvent{s},
|
UpdateContainerEventType: &UpdateEvent{s},
|
||||||
|
CreateCheckpointEventType: &CreateCheckpointEvent{s},
|
||||||
}
|
}
|
||||||
// start the container workers for concurrent container starts
|
// start the container workers for concurrent container starts
|
||||||
return s, nil
|
return s, nil
|
||||||
|
|
16
worker.go
16
worker.go
|
@ -11,8 +11,14 @@ type Worker interface {
|
||||||
Start()
|
Start()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Checkpoint struct {
|
||||||
|
Path string
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
type StartTask struct {
|
type StartTask struct {
|
||||||
Container runtime.Container
|
Container runtime.Container
|
||||||
|
Checkpoint *Checkpoint
|
||||||
Err chan error
|
Err chan error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,6 +38,15 @@ func (w *worker) Start() {
|
||||||
defer w.wg.Done()
|
defer w.wg.Done()
|
||||||
for t := range w.s.tasks {
|
for t := range w.s.tasks {
|
||||||
started := time.Now()
|
started := time.Now()
|
||||||
|
if t.Checkpoint != nil {
|
||||||
|
if err := t.Container.Restore(t.Checkpoint.Path, t.Checkpoint.Name); err != nil {
|
||||||
|
evt := NewEvent(DeleteEventType)
|
||||||
|
evt.ID = t.Container.ID()
|
||||||
|
w.s.SendEvent(evt)
|
||||||
|
t.Err <- err
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
if err := t.Container.Start(); err != nil {
|
if err := t.Container.Start(); err != nil {
|
||||||
evt := NewEvent(DeleteEventType)
|
evt := NewEvent(DeleteEventType)
|
||||||
evt.ID = t.Container.ID()
|
evt.ID = t.Container.ID()
|
||||||
|
@ -39,6 +54,7 @@ func (w *worker) Start() {
|
||||||
t.Err <- err
|
t.Err <- err
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
}
|
||||||
ContainerStartTimer.UpdateSince(started)
|
ContainerStartTimer.UpdateSince(started)
|
||||||
t.Err <- nil
|
t.Err <- nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue