containerd/supervisor.go

136 lines
3.2 KiB
Go

package containerd
import (
"os"
"sync"
"time"
"github.com/Sirupsen/logrus"
"github.com/rcrowley/go-metrics"
)
var (
containerStartTimer = metrics.NewTimer()
)
// NewSupervisor returns an initialized Process supervisor.
func NewSupervisor(stateDir string, concurrency int) (*Supervisor, error) {
if err := os.MkdirAll(stateDir, 0755); err != nil {
return nil, err
}
// register counters
metrics.DefaultRegistry.Register("container-start-time", containerStartTimer)
runtime, err := NewRuntime(stateDir)
if err != nil {
return nil, err
}
s := &Supervisor{
stateDir: stateDir,
processes: make(map[int]Container),
containers: make(map[string]Container),
runtime: runtime,
jobs: make(chan Job, 1024),
}
for i := 0; i < concurrency; i++ {
s.workerGroup.Add(1)
go s.worker(i)
}
return s, nil
}
type Supervisor struct {
// stateDir is the directory on the system to store container runtime state information.
stateDir string
processes map[int]Container
containers map[string]Container
runtime Runtime
events chan Event
jobs chan Job
workerGroup sync.WaitGroup
}
// Start is a non-blocking call that runs the supervisor for monitoring contianer processes and
// executing new containers.
//
// This event loop is the only thing that is allowed to modify state of containers and processes.
func (s *Supervisor) Start(events chan Event) error {
if events == nil {
return ErrEventChanNil
}
s.events = events
go func() {
for evt := range events {
logrus.WithField("event", evt).Debug("containerd: processing event")
switch e := evt.(type) {
case *ExitEvent:
logrus.WithFields(logrus.Fields{
"pid": e.Pid,
"status": e.Status,
}).Debug("containerd: process exited")
if container, ok := s.processes[e.Pid]; ok {
container.SetExited(e.Status)
delete(s.processes, e.Pid)
delete(s.containers, container.ID())
if err := container.Delete(); err != nil {
logrus.WithField("error", err).Error("containerd: deleting container")
}
}
case *StartedEvent:
s.containers[e.ID] = e.Container
pid, err := e.Container.Pid()
if err != nil {
logrus.WithField("error", err).Error("containerd: getting container pid")
continue
}
s.processes[pid] = e.Container
case *CreateContainerEvent:
j := &CreateJob{
ID: e.ID,
BundlePath: e.BundlePath,
Err: e.Err,
}
s.jobs <- j
}
}
}()
return nil
}
func (s *Supervisor) SendEvent(evt Event) {
s.events <- evt
}
// Stop initiates a shutdown of the supervisor killing all processes under supervision.
func (s *Supervisor) Stop() {
}
func (s *Supervisor) worker(id int) {
defer func() {
s.workerGroup.Done()
logrus.WithField("worker", id).Debug("containerd: worker finished")
}()
logrus.WithField("worker", id).Debug("containerd: starting worker")
for job := range s.jobs {
switch j := job.(type) {
case *CreateJob:
start := time.Now()
container, err := s.runtime.Create(j.ID, j.BundlePath)
if err != nil {
j.Err <- err
continue
}
s.SendEvent(&StartedEvent{
ID: j.ID,
Container: container,
})
j.Err <- nil
containerStartTimer.UpdateSince(start)
}
}
}