containerd/supervisor.go

169 lines
4.1 KiB
Go
Raw Normal View History

2015-11-05 23:29:53 +00:00
package containerd
import (
"os"
2015-11-10 22:24:34 +00:00
"path/filepath"
goruntime "runtime"
2015-11-07 00:44:52 +00:00
"sync"
"time"
2015-11-05 23:29:53 +00:00
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/runtime"
2015-11-06 21:01:55 +00:00
"github.com/opencontainers/runc/libcontainer"
2015-11-05 23:29:53 +00:00
)
// NewSupervisor returns an initialized Process supervisor.
2015-11-05 23:49:13 +00:00
func NewSupervisor(stateDir string, concurrency int) (*Supervisor, error) {
2015-11-05 23:29:53 +00:00
if err := os.MkdirAll(stateDir, 0755); err != nil {
return nil, err
}
2015-11-06 00:16:11 +00:00
// register counters
r, err := newRuntime(stateDir)
2015-11-05 23:49:13 +00:00
if err != nil {
return nil, err
2015-11-05 23:29:53 +00:00
}
2015-11-10 22:24:34 +00:00
j, err := newJournal(filepath.Join(stateDir, "journal.json"))
if err != nil {
return nil, err
}
2015-11-05 23:49:13 +00:00
s := &Supervisor{
stateDir: stateDir,
containers: make(map[string]runtime.Container),
processes: make(map[int]runtime.Container),
runtime: r,
2015-11-07 00:44:52 +00:00
tasks: make(chan *startTask, concurrency*100),
2015-11-10 22:24:34 +00:00
journal: j,
2015-11-07 00:44:52 +00:00
}
// register default event handlers
s.handlers = map[EventType]Handler{
ExecExitEventType: &ExecExitEvent{s},
ExitEventType: &ExitEvent{s},
StartContainerEventType: &StartEvent{s},
DeleteEventType: &DeleteEvent{s},
GetContainerEventType: &GetContainersEvent{s},
SignalEventType: &SignalEvent{s},
AddProcessEventType: &AddProcessEvent{s},
UpdateContainerEventType: &UpdateEvent{s},
}
// start the container workers for concurrent container starts
2015-11-07 00:44:52 +00:00
for i := 0; i < concurrency; i++ {
s.workerGroup.Add(1)
go s.startContainerWorker(s.tasks)
2015-11-05 23:29:53 +00:00
}
return s, nil
}
type Supervisor struct {
// stateDir is the directory on the system to store container runtime state information.
stateDir string
containers map[string]runtime.Container
processes map[int]runtime.Container
handlers map[EventType]Handler
runtime runtime.Runtime
journal *journal
2015-11-10 22:24:34 +00:00
events chan *Event
2015-11-07 00:44:52 +00:00
tasks chan *startTask
workerGroup sync.WaitGroup
subscribers map[subscriber]bool
2015-11-05 23:29:53 +00:00
}
type subscriber chan *Event
// need proper close logic for jobs and stuff so that sending to the channels dont panic
// but can complete jobs
2015-11-10 22:24:34 +00:00
func (s *Supervisor) Close() error {
//TODO: unsubscribe all channels
2015-11-10 22:24:34 +00:00
return s.journal.Close()
}
func (s *Supervisor) Events() subscriber {
return subscriber(make(chan *Event))
}
func (s *Supervisor) Unsubscribe(sub subscriber) {
delete(s.subscribers, sub)
}
func (s *Supervisor) NotifySubscribers(e *Event) {
for sub := range s.subscribers {
sub <- e
}
}
2015-11-06 00:40:57 +00:00
// Start is a non-blocking call that runs the supervisor for monitoring contianer processes and
2015-11-05 23:29:53 +00:00
// executing new containers.
//
// This event loop is the only thing that is allowed to modify state of containers and processes.
2015-11-10 22:24:34 +00:00
func (s *Supervisor) Start(events chan *Event) error {
2015-11-05 23:29:53 +00:00
if events == nil {
return ErrEventChanNil
}
s.events = events
2015-11-06 00:40:57 +00:00
go func() {
// allocate an entire thread to this goroutine for the main event loop
// so that nothing else is scheduled over the top of it.
goruntime.LockOSThread()
2015-11-10 22:24:34 +00:00
for e := range events {
2015-11-13 22:09:35 +00:00
s.journal.write(e)
h, ok := s.handlers[e.Type]
if !ok {
e.Err <- ErrUnknownEvent
continue
}
if err := h.Handle(e); err != nil {
if err != errDeferedResponse {
2015-11-10 21:44:35 +00:00
e.Err <- err
close(e.Err)
2015-11-10 21:44:35 +00:00
}
continue
2015-11-05 23:29:53 +00:00
}
close(e.Err)
2015-11-05 23:29:53 +00:00
}
2015-11-06 00:40:57 +00:00
}()
2015-11-05 23:29:53 +00:00
return nil
}
func (s *Supervisor) getContainerForPid(pid int) (runtime.Container, error) {
2015-11-06 21:01:55 +00:00
for _, container := range s.containers {
cpid, err := container.Pid()
if err != nil {
if lerr, ok := err.(libcontainer.Error); ok {
if lerr.Code() == libcontainer.ProcessNotExecuted {
continue
}
}
logrus.WithField("error", err).Error("containerd: get container pid")
}
if pid == cpid {
return container, nil
}
}
return nil, errNoContainerForPid
}
2015-11-10 22:24:34 +00:00
func (s *Supervisor) SendEvent(evt *Event) {
EventsCounter.Inc(1)
2015-11-05 23:29:53 +00:00
s.events <- evt
}
2015-11-07 00:44:52 +00:00
type startTask struct {
container runtime.Container
2015-11-07 00:44:52 +00:00
err chan error
}
func (s *Supervisor) startContainerWorker(tasks chan *startTask) {
defer s.workerGroup.Done()
for t := range tasks {
started := time.Now()
2015-11-07 00:44:52 +00:00
if err := t.container.Start(); err != nil {
2015-11-10 22:24:34 +00:00
e := NewEvent(StartContainerEventType)
e.ID = t.container.ID()
s.SendEvent(e)
2015-11-07 00:44:52 +00:00
t.err <- err
continue
}
ContainerStartTimer.UpdateSince(started)
2015-11-07 00:44:52 +00:00
t.err <- nil
}
}