Move supervisor to it's own package

It allows to keep main namespace cleaner

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
This commit is contained in:
Alexander Morozov 2015-12-17 16:07:04 -08:00
parent b296d50493
commit 69f8f566a2
24 changed files with 61 additions and 59 deletions

36
supervisor/add_process.go Normal file
View file

@ -0,0 +1,36 @@
package supervisor
import "github.com/Sirupsen/logrus"
type AddProcessEvent struct {
s *Supervisor
}
// TODO: add this to worker for concurrent starts??? maybe not because of races where the container
// could be stopped and removed...
func (h *AddProcessEvent) Handle(e *Event) error {
ci, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
p, io, err := h.s.runtime.StartProcess(ci.container, *e.Process, e.Console)
if err != nil {
return err
}
if e.Pid, err = p.Pid(); err != nil {
return err
}
h.s.processes[e.Pid] = &containerInfo{
container: ci.container,
}
l, err := h.s.copyIO(e.Stdin, e.Stdout, e.Stderr, io)
if err != nil {
// log the error but continue with the other commands
logrus.WithFields(logrus.Fields{
"error": err,
"id": e.ID,
}).Error("log stdio")
}
h.s.processes[e.Pid].copier = l
return nil
}

25
supervisor/checkpoint.go Normal file
View file

@ -0,0 +1,25 @@
package supervisor
type CreateCheckpointEvent struct {
s *Supervisor
}
func (h *CreateCheckpointEvent) Handle(e *Event) error {
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
return i.container.Checkpoint(*e.Checkpoint)
}
type DeleteCheckpointEvent struct {
s *Supervisor
}
func (h *DeleteCheckpointEvent) Handle(e *Event) error {
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
return i.container.DeleteCheckpoint(e.Checkpoint.Name)
}

31
supervisor/create.go Normal file
View file

@ -0,0 +1,31 @@
package supervisor
type StartEvent struct {
s *Supervisor
}
func (h *StartEvent) Handle(e *Event) error {
container, io, err := h.s.runtime.Create(e.ID, e.BundlePath, e.Console)
if err != nil {
return err
}
h.s.containerGroup.Add(1)
h.s.containers[e.ID] = &containerInfo{
container: container,
}
ContainersCounter.Inc(1)
task := &StartTask{
Err: e.Err,
IO: io,
Container: container,
Stdin: e.Stdin,
Stdout: e.Stdout,
Stderr: e.Stderr,
StartResponse: e.StartResponse,
}
if e.Checkpoint != nil {
task.Checkpoint = e.Checkpoint.Name
}
h.s.tasks <- task
return errDeferedResponse
}

37
supervisor/delete.go Normal file
View file

@ -0,0 +1,37 @@
package supervisor
import (
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/runtime"
)
type DeleteEvent struct {
s *Supervisor
}
func (h *DeleteEvent) Handle(e *Event) error {
if i, ok := h.s.containers[e.ID]; ok {
if err := h.deleteContainer(i.container); err != nil {
logrus.WithField("error", err).Error("containerd: deleting container")
}
if i.copier != nil {
if err := i.copier.Close(); err != nil {
logrus.WithField("error", err).Error("containerd: close container copier")
}
}
h.s.notifySubscribers(&Event{
Type: ExitEventType,
ID: e.ID,
Status: e.Status,
Pid: e.Pid,
})
ContainersCounter.Dec(1)
h.s.containerGroup.Done()
}
return nil
}
func (h *DeleteEvent) deleteContainer(container runtime.Container) error {
delete(h.s.containers, container.ID())
return container.Delete()
}

24
supervisor/errors.go Normal file
View file

@ -0,0 +1,24 @@
package supervisor
import "errors"
var (
// External errors
ErrEventChanNil = errors.New("containerd: event channel is nil")
ErrBundleNotFound = errors.New("containerd: bundle not found")
ErrContainerNotFound = errors.New("containerd: container not found")
ErrContainerExists = errors.New("containerd: container already exists")
ErrProcessNotFound = errors.New("containerd: processs not found for container")
ErrUnknownContainerStatus = errors.New("containerd: unknown container status ")
ErrUnknownEvent = errors.New("containerd: unknown event type")
// Internal errors
errShutdown = errors.New("containerd: supervisor is shutdown")
errRootNotAbs = errors.New("containerd: rootfs path is not an absolute path")
errNoContainerForPid = errors.New("containerd: pid not registered for any container")
// internal error where the handler will defer to another for the final response
//
// TODO: we could probably do a typed error with another error channel for this to make it
// less like magic
errDeferedResponse = errors.New("containerd: defered response")
)

84
supervisor/event.go Normal file
View file

@ -0,0 +1,84 @@
package supervisor
import (
"os"
"time"
"github.com/docker/containerd/runtime"
"github.com/opencontainers/specs"
)
type EventType string
const (
ExecExitEventType EventType = "execExit"
ExitEventType EventType = "exit"
StartContainerEventType EventType = "startContainer"
DeleteEventType EventType = "deleteContainerEvent"
GetContainerEventType EventType = "getContainer"
SignalEventType EventType = "signal"
AddProcessEventType EventType = "addProcess"
UpdateContainerEventType EventType = "updateContainer"
CreateCheckpointEventType EventType = "createCheckpoint"
DeleteCheckpointEventType EventType = "deleteCheckpoint"
StatsEventType EventType = "events"
UnsubscribeStatsEventType EventType = "unsubscribeStats"
StopStatsEventType EventType = "stopStats"
OOMEventType EventType = "oom"
)
func NewEvent(t EventType) *Event {
return &Event{
Type: t,
Timestamp: time.Now(),
Err: make(chan error, 1),
}
}
type StartResponse struct {
Pid int
}
type Event struct {
Type EventType
Timestamp time.Time
ID string
BundlePath string
Stdout string
Stderr string
Stdin string
Console string
Pid int
Status int
Signal os.Signal
Process *specs.Process
State *runtime.State
Containers []runtime.Container
Checkpoint *runtime.Checkpoint
Err chan error
StartResponse chan StartResponse
Stats chan interface{}
}
type Handler interface {
Handle(*Event) error
}
type commonEvent struct {
data *Event
sv *Supervisor
}
func (e *commonEvent) Handle() {
h, ok := e.sv.handlers[e.data.Type]
if !ok {
e.data.Err <- ErrUnknownEvent
return
}
err := h.Handle(e.data)
if err != errDeferedResponse {
e.data.Err <- err
close(e.data.Err)
return
}
}

58
supervisor/exit.go Normal file
View file

@ -0,0 +1,58 @@
package supervisor
import "github.com/Sirupsen/logrus"
type ExitEvent struct {
s *Supervisor
}
func (h *ExitEvent) Handle(e *Event) error {
logrus.WithFields(logrus.Fields{"pid": e.Pid, "status": e.Status}).
Debug("containerd: process exited")
// is it the child process of a container
if info, ok := h.s.processes[e.Pid]; ok {
ne := NewEvent(ExecExitEventType)
ne.ID = info.container.ID()
ne.Pid = e.Pid
ne.Status = e.Status
h.s.SendEvent(ne)
return nil
}
// is it the main container's process
container, err := h.s.getContainerForPid(e.Pid)
if err != nil {
if err != errNoContainerForPid {
logrus.WithField("error", err).Error("containerd: find containers main pid")
}
return nil
}
container.SetExited(e.Status)
ne := NewEvent(DeleteEventType)
ne.ID = container.ID()
ne.Pid = e.Pid
ne.Status = e.Status
h.s.SendEvent(ne)
stopCollect := NewEvent(StopStatsEventType)
stopCollect.ID = container.ID()
h.s.SendEvent(stopCollect)
return nil
}
type ExecExitEvent struct {
s *Supervisor
}
func (h *ExecExitEvent) Handle(e *Event) error {
// exec process: we remove this process without notifying the main event loop
info := h.s.processes[e.Pid]
if err := info.container.RemoveProcess(e.Pid); err != nil {
logrus.WithField("error", err).Error("containerd: find container for pid")
}
if err := info.copier.Close(); err != nil {
logrus.WithField("error", err).Error("containerd: close process IO")
}
delete(h.s.processes, e.Pid)
h.s.notifySubscribers(e)
return nil
}

View file

@ -0,0 +1,12 @@
package supervisor
type GetContainersEvent struct {
s *Supervisor
}
func (h *GetContainersEvent) Handle(e *Event) error {
for _, i := range h.s.containers {
e.Containers = append(e.Containers, i.container)
}
return nil
}

66
supervisor/io.go Normal file
View file

@ -0,0 +1,66 @@
package supervisor
import (
"io"
"os"
)
type ioConfig struct {
StdoutPath string
StderrPath string
StdinPath string
Stdin io.WriteCloser
Stdout io.ReadCloser
Stderr io.ReadCloser
}
func newCopier(i *ioConfig) (*copier, error) {
l := &copier{
config: i,
}
if i.StdinPath != "" {
f, err := os.OpenFile(i.StdinPath, os.O_RDONLY, 0)
if err != nil {
return nil, err
}
l.closers = append(l.closers, f)
go func() {
io.Copy(i.Stdin, f)
i.Stdin.Close()
}()
}
if i.StdoutPath != "" {
f, err := os.OpenFile(i.StdoutPath, os.O_RDWR, 0)
if err != nil {
return nil, err
}
l.closers = append(l.closers, f)
go io.Copy(f, i.Stdout)
}
if i.StderrPath != "" {
f, err := os.OpenFile(i.StderrPath, os.O_RDWR, 0)
if err != nil {
return nil, err
}
l.closers = append(l.closers, f)
go io.Copy(f, i.Stderr)
}
return l, nil
}
type copier struct {
config *ioConfig
closers []io.Closer
}
func (l *copier) Close() (err error) {
for _, c := range append(l.closers, l.config.Stdin, l.config.Stdout, l.config.Stderr) {
if c != nil {
if cerr := c.Close(); err == nil {
err = cerr
}
}
}
return err
}

26
supervisor/machine.go Normal file
View file

@ -0,0 +1,26 @@
package supervisor
import "github.com/cloudfoundry/gosigar"
type Machine struct {
ID string
Cpus int
Memory int64
}
func CollectMachineInformation(id string) (Machine, error) {
m := Machine{
ID: id,
}
cpu := sigar.CpuList{}
if err := cpu.Get(); err != nil {
return m, err
}
m.Cpus = len(cpu.List)
mem := sigar.Mem{}
if err := mem.Get(); err != nil {
return m, err
}
m.Memory = int64(mem.Total)
return m, nil
}

80
supervisor/oom.go Normal file
View file

@ -0,0 +1,80 @@
package supervisor
import (
"reflect"
"sync"
)
func newNotifier(s *Supervisor) *notifier {
n := &notifier{
s: s,
channels: make(map[<-chan struct{}]string),
controller: make(chan struct{}),
}
go n.start()
return n
}
type notifier struct {
m sync.Mutex
channels map[<-chan struct{}]string
controller chan struct{}
s *Supervisor
}
func (n *notifier) start() {
for {
c := n.createCase()
i, _, ok := reflect.Select(c)
if i == 0 {
continue
}
if ok {
ch := c[i].Chan.Interface().(<-chan struct{})
id := n.channels[ch]
e := NewEvent(OOMEventType)
e.ID = id
n.s.SendEvent(e)
continue
}
// the channel was closed and we should remove it
ch := c[i].Chan.Interface().(<-chan struct{})
n.removeChan(ch)
}
}
func (n *notifier) Add(ch <-chan struct{}, id string) {
n.m.Lock()
n.channels[ch] = id
n.m.Unlock()
// signal the main loop to break and add the new
// channels
n.controller <- struct{}{}
}
func (n *notifier) createCase() []reflect.SelectCase {
var out []reflect.SelectCase
// add controller chan so that we can signal when we need to make
// changes in the select. The controller chan will always be at
// index 0 in the slice
out = append(out, reflect.SelectCase{
Dir: reflect.SelectRecv,
Chan: reflect.ValueOf(n.controller),
})
n.m.Lock()
for ch := range n.channels {
v := reflect.ValueOf(ch)
out = append(out, reflect.SelectCase{
Dir: reflect.SelectRecv,
Chan: v,
})
}
n.m.Unlock()
return out
}
func (n *notifier) removeChan(ch <-chan struct{}) {
n.m.Lock()
delete(n.channels, ch)
n.m.Unlock()
}

22
supervisor/signal.go Normal file
View file

@ -0,0 +1,22 @@
package supervisor
type SignalEvent struct {
s *Supervisor
}
func (h *SignalEvent) Handle(e *Event) error {
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
processes, err := i.container.Processes()
if err != nil {
return err
}
for _, p := range processes {
if pid, err := p.Pid(); err == nil && pid == e.Pid {
return p.Signal(e.Signal)
}
}
return ErrProcessNotFound
}

58
supervisor/stats.go Normal file
View file

@ -0,0 +1,58 @@
package supervisor
import "github.com/rcrowley/go-metrics"
var (
ContainerStartTimer = metrics.NewTimer()
ContainersCounter = metrics.NewCounter()
EventsCounter = metrics.NewCounter()
EventSubscriberCounter = metrics.NewCounter()
)
func Metrics() map[string]interface{} {
return map[string]interface{}{
"container-start-time": ContainerStartTimer,
"containers": ContainersCounter,
"events": EventsCounter,
"events-subscribers": EventSubscriberCounter,
}
}
type StatsEvent struct {
s *Supervisor
}
type UnsubscribeStatsEvent struct {
s *Supervisor
}
type StopStatsEvent struct {
s *Supervisor
}
func (h *StatsEvent) Handle(e *Event) error {
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
e.Stats = h.s.statsCollector.collect(i.container)
return nil
}
func (h *UnsubscribeStatsEvent) Handle(e *Event) error {
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
h.s.statsCollector.unsubscribe(i.container, e.Stats)
return nil
}
func (h *StopStatsEvent) Handle(e *Event) error {
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
h.s.statsCollector.stopCollection(i.container)
return nil
}

View file

@ -0,0 +1,240 @@
package supervisor
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/api/grpc/types"
"github.com/docker/containerd/runtime"
"github.com/docker/docker/pkg/pubsub"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/system"
)
func convertBlkioEntryToPb(b []cgroups.BlkioStatEntry) []*types.BlkioStatsEntry {
var pbEs []*types.BlkioStatsEntry
for _, e := range b {
pbEs = append(pbEs, &types.BlkioStatsEntry{
Major: e.Major,
Minor: e.Minor,
Op: e.Op,
Value: e.Value,
})
}
return pbEs
}
func convertToPb(st *runtime.Stat) *types.Stats {
pbSt := &types.Stats{
Timestamp: uint64(st.Timestamp.Unix()),
CgroupStats: &types.CgroupStats{},
}
lcSt, ok := st.Data.(*libcontainer.Stats)
if !ok {
return pbSt
}
cpuSt := lcSt.CgroupStats.CpuStats
pbSt.CgroupStats.CpuStats = &types.CpuStats{
CpuUsage: &types.CpuUsage{
TotalUsage: cpuSt.CpuUsage.TotalUsage,
PercpuUsage: cpuSt.CpuUsage.PercpuUsage,
UsageInKernelmode: cpuSt.CpuUsage.UsageInKernelmode,
UsageInUsermode: cpuSt.CpuUsage.UsageInUsermode,
},
ThrottlingData: &types.ThrottlingData{
Periods: cpuSt.ThrottlingData.Periods,
ThrottledPeriods: cpuSt.ThrottlingData.ThrottledPeriods,
ThrottledTime: cpuSt.ThrottlingData.ThrottledTime,
},
}
memSt := lcSt.CgroupStats.MemoryStats
pbSt.CgroupStats.MemoryStats = &types.MemoryStats{
Cache: memSt.Cache,
Usage: &types.MemoryData{
Usage: memSt.Usage.Usage,
MaxUsage: memSt.Usage.MaxUsage,
Failcnt: memSt.Usage.Failcnt,
},
SwapUsage: &types.MemoryData{
Usage: memSt.SwapUsage.Usage,
MaxUsage: memSt.SwapUsage.MaxUsage,
Failcnt: memSt.SwapUsage.Failcnt,
},
}
blkSt := lcSt.CgroupStats.BlkioStats
pbSt.CgroupStats.BlkioStats = &types.BlkioStats{
IoServiceBytesRecursive: convertBlkioEntryToPb(blkSt.IoServiceBytesRecursive),
IoServicedRecursive: convertBlkioEntryToPb(blkSt.IoServicedRecursive),
IoQueuedRecursive: convertBlkioEntryToPb(blkSt.IoQueuedRecursive),
IoServiceTimeRecursive: convertBlkioEntryToPb(blkSt.IoServiceTimeRecursive),
IoWaitTimeRecursive: convertBlkioEntryToPb(blkSt.IoWaitTimeRecursive),
IoMergedRecursive: convertBlkioEntryToPb(blkSt.IoMergedRecursive),
IoTimeRecursive: convertBlkioEntryToPb(blkSt.IoTimeRecursive),
SectorsRecursive: convertBlkioEntryToPb(blkSt.SectorsRecursive),
}
pbSt.CgroupStats.HugetlbStats = make(map[string]*types.HugetlbStats)
for k, st := range lcSt.CgroupStats.HugetlbStats {
pbSt.CgroupStats.HugetlbStats[k] = &types.HugetlbStats{
Usage: st.Usage,
MaxUsage: st.MaxUsage,
Failcnt: st.Failcnt,
}
}
return pbSt
}
type statsPair struct {
ct runtime.Container
pub *pubsub.Publisher
}
func newStatsCollector(interval time.Duration) *statsCollector {
s := &statsCollector{
interval: interval,
clockTicksPerSecond: uint64(system.GetClockTicks()),
bufReader: bufio.NewReaderSize(nil, 128),
publishers: make(map[string]*statsPair),
}
go s.run()
return s
}
// statsCollector manages and provides container resource stats
type statsCollector struct {
m sync.Mutex
supervisor *Supervisor
interval time.Duration
clockTicksPerSecond uint64
publishers map[string]*statsPair
bufReader *bufio.Reader
}
// collect registers the container with the collector and adds it to
// the event loop for collection on the specified interval returning
// a channel for the subscriber to receive on.
func (s *statsCollector) collect(c runtime.Container) chan interface{} {
s.m.Lock()
defer s.m.Unlock()
publisher, exists := s.publishers[c.ID()]
if !exists {
pub := pubsub.NewPublisher(100*time.Millisecond, 1024)
publisher = &statsPair{ct: c, pub: pub}
s.publishers[c.ID()] = publisher
}
return publisher.pub.Subscribe()
}
// stopCollection closes the channels for all subscribers and removes
// the container from metrics collection.
func (s *statsCollector) stopCollection(c runtime.Container) {
s.m.Lock()
if publisher, exists := s.publishers[c.ID()]; exists {
publisher.pub.Close()
delete(s.publishers, c.ID())
}
s.m.Unlock()
}
// unsubscribe removes a specific subscriber from receiving updates for a container's stats.
func (s *statsCollector) unsubscribe(c runtime.Container, ch chan interface{}) {
s.m.Lock()
publisher := s.publishers[c.ID()]
if publisher != nil {
publisher.pub.Evict(ch)
if publisher.pub.Len() == 0 {
delete(s.publishers, c.ID())
}
}
s.m.Unlock()
}
func (s *statsCollector) run() {
type publishersPair struct {
container runtime.Container
publisher *pubsub.Publisher
}
// we cannot determine the capacity here.
// it will grow enough in first iteration
var pairs []*statsPair
for range time.Tick(s.interval) {
// it does not make sense in the first iteration,
// but saves allocations in further iterations
pairs = pairs[:0]
s.m.Lock()
for _, publisher := range s.publishers {
// copy pointers here to release the lock ASAP
pairs = append(pairs, publisher)
}
s.m.Unlock()
if len(pairs) == 0 {
continue
}
for _, pair := range pairs {
stats, err := pair.ct.Stats()
if err != nil {
logrus.Errorf("Error getting stats for container ID %s", pair.ct.ID())
continue
}
pair.pub.Publish(convertToPb(stats))
}
}
}
const nanoSecondsPerSecond = 1e9
// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds. An error is returned if the format of the underlying
// file does not match.
//
// Uses /proc/stat defined by POSIX. Looks for the cpu
// statistics line and then sums up the first seven fields
// provided. See `man 5 proc` for details on specific field
// information.
func (s *statsCollector) getSystemCPUUsage() (uint64, error) {
var line string
f, err := os.Open("/proc/stat")
if err != nil {
return 0, err
}
defer func() {
s.bufReader.Reset(nil)
f.Close()
}()
s.bufReader.Reset(f)
err = nil
for err == nil {
line, err = s.bufReader.ReadString('\n')
if err != nil {
break
}
parts := strings.Fields(line)
switch parts[0] {
case "cpu":
if len(parts) < 8 {
return 0, fmt.Errorf("bad format of cpu stats")
}
var totalClockTicks uint64
for _, i := range parts[1:8] {
v, err := strconv.ParseUint(i, 10, 64)
if err != nil {
return 0, fmt.Errorf("error parsing cpu stats")
}
totalClockTicks += v
}
return (totalClockTicks * nanoSecondsPerSecond) /
s.clockTicksPerSecond, nil
}
}
return 0, fmt.Errorf("bad stats format")
}

235
supervisor/supervisor.go Normal file
View file

@ -0,0 +1,235 @@
package supervisor
import (
"os"
"os/signal"
"path/filepath"
"sync"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/eventloop"
"github.com/docker/containerd/runtime"
"github.com/opencontainers/runc/libcontainer"
)
const (
statsInterval = 1 * time.Second
defaultBufferSize = 2048 // size of queue in eventloop
)
// New returns an initialized Process supervisor.
func New(id, stateDir string, tasks chan *StartTask, oom bool) (*Supervisor, error) {
if err := os.MkdirAll(stateDir, 0755); err != nil {
return nil, err
}
// register counters
r, err := newRuntime(filepath.Join(stateDir, id))
if err != nil {
return nil, err
}
machine, err := CollectMachineInformation(id)
if err != nil {
return nil, err
}
s := &Supervisor{
stateDir: stateDir,
containers: make(map[string]*containerInfo),
processes: make(map[int]*containerInfo),
runtime: r,
tasks: tasks,
machine: machine,
subscribers: make(map[chan *Event]struct{}),
statsCollector: newStatsCollector(statsInterval),
el: eventloop.NewChanLoop(defaultBufferSize),
}
if oom {
s.notifier = newNotifier(s)
}
// register default event handlers
s.handlers = map[EventType]Handler{
ExecExitEventType: &ExecExitEvent{s},
ExitEventType: &ExitEvent{s},
StartContainerEventType: &StartEvent{s},
DeleteEventType: &DeleteEvent{s},
GetContainerEventType: &GetContainersEvent{s},
SignalEventType: &SignalEvent{s},
AddProcessEventType: &AddProcessEvent{s},
UpdateContainerEventType: &UpdateEvent{s},
CreateCheckpointEventType: &CreateCheckpointEvent{s},
DeleteCheckpointEventType: &DeleteCheckpointEvent{s},
StatsEventType: &StatsEvent{s},
UnsubscribeStatsEventType: &UnsubscribeStatsEvent{s},
StopStatsEventType: &StopStatsEvent{s},
}
// start the container workers for concurrent container starts
return s, nil
}
type containerInfo struct {
container runtime.Container
copier *copier
}
type Supervisor struct {
// stateDir is the directory on the system to store container runtime state information.
stateDir string
containers map[string]*containerInfo
processes map[int]*containerInfo
handlers map[EventType]Handler
runtime runtime.Runtime
events chan *Event
tasks chan *StartTask
// we need a lock around the subscribers map only because additions and deletions from
// the map are via the API so we cannot really control the concurrency
subscriberLock sync.RWMutex
subscribers map[chan *Event]struct{}
machine Machine
containerGroup sync.WaitGroup
statsCollector *statsCollector
notifier *notifier
el eventloop.EventLoop
}
// Stop closes all tasks and sends a SIGTERM to each container's pid1 then waits for they to
// terminate. After it has handled all the SIGCHILD events it will close the signals chan
// and exit. Stop is a non-blocking call and will return after the containers have been signaled
func (s *Supervisor) Stop(sig chan os.Signal) {
// Close the tasks channel so that no new containers get started
close(s.tasks)
// send a SIGTERM to all containers
for id, i := range s.containers {
c := i.container
logrus.WithField("id", id).Debug("sending TERM to container processes")
procs, err := c.Processes()
if err != nil {
logrus.WithField("id", id).Warn("get container processes")
continue
}
if len(procs) == 0 {
continue
}
mainProc := procs[0]
if err := mainProc.Signal(syscall.SIGTERM); err != nil {
pid, _ := mainProc.Pid()
logrus.WithFields(logrus.Fields{
"id": id,
"pid": pid,
"error": err,
}).Error("send SIGTERM to process")
}
}
go func() {
logrus.Debug("waiting for containers to exit")
s.containerGroup.Wait()
logrus.Debug("all containers exited")
// stop receiving signals and close the channel
signal.Stop(sig)
close(sig)
}()
}
// Close closes any open files in the supervisor but expects that Stop has been
// callsed so that no more containers are started.
func (s *Supervisor) Close() error {
return nil
}
// Events returns an event channel that external consumers can use to receive updates
// on container events
func (s *Supervisor) Events() chan *Event {
s.subscriberLock.Lock()
defer s.subscriberLock.Unlock()
c := make(chan *Event, defaultBufferSize)
EventSubscriberCounter.Inc(1)
s.subscribers[c] = struct{}{}
return c
}
// Unsubscribe removes the provided channel from receiving any more events
func (s *Supervisor) Unsubscribe(sub chan *Event) {
s.subscriberLock.Lock()
defer s.subscriberLock.Unlock()
delete(s.subscribers, sub)
close(sub)
EventSubscriberCounter.Dec(1)
}
// notifySubscribers will send the provided event to the external subscribers
// of the events channel
func (s *Supervisor) notifySubscribers(e *Event) {
s.subscriberLock.RLock()
defer s.subscriberLock.RUnlock()
for sub := range s.subscribers {
// do a non-blocking send for the channel
select {
case sub <- e:
default:
logrus.WithField("event", e.Type).Warn("event not sent to subscriber")
}
}
}
// Start is a non-blocking call that runs the supervisor for monitoring contianer processes and
// executing new containers.
//
// This event loop is the only thing that is allowed to modify state of containers and processes
// therefore it is save to do operations in the handlers that modify state of the system or
// state of the Supervisor
func (s *Supervisor) Start() error {
logrus.WithFields(logrus.Fields{
"runtime": s.runtime.Type(),
"stateDir": s.stateDir,
}).Debug("Supervisor started")
return s.el.Start()
}
// Machine returns the machine information for which the
// supervisor is executing on.
func (s *Supervisor) Machine() Machine {
return s.machine
}
// getContainerForPid returns the container where the provided pid is the pid1 or main
// process in the container
func (s *Supervisor) getContainerForPid(pid int) (runtime.Container, error) {
for _, i := range s.containers {
container := i.container
cpid, err := container.Pid()
if err != nil {
if lerr, ok := err.(libcontainer.Error); ok {
if lerr.Code() == libcontainer.ProcessNotExecuted {
continue
}
}
logrus.WithField("error", err).Error("containerd: get container pid")
}
if pid == cpid {
return container, nil
}
}
return nil, errNoContainerForPid
}
// SendEvent sends the provided event the the supervisors main event loop
func (s *Supervisor) SendEvent(evt *Event) {
EventsCounter.Inc(1)
s.el.Send(&commonEvent{data: evt, sv: s})
}
func (s *Supervisor) copyIO(stdin, stdout, stderr string, i *runtime.IO) (*copier, error) {
config := &ioConfig{
Stdin: i.Stdin,
Stdout: i.Stdout,
Stderr: i.Stderr,
StdoutPath: stdout,
StderrPath: stderr,
StdinPath: stdin,
}
l, err := newCopier(config)
if err != nil {
return nil, err
}
return l, nil
}

View file

@ -0,0 +1,12 @@
// +build libcontainer
package supervisor
import (
"github.com/docker/containerd/linux"
"github.com/docker/containerd/runtime"
)
func newRuntime(stateDir string) (runtime.Runtime, error) {
return linux.NewRuntime(stateDir)
}

View file

@ -0,0 +1,12 @@
// +build runc
package supervisor
import (
"github.com/docker/containerd/runc"
"github.com/docker/containerd/runtime"
)
func newRuntime(stateDir string) (runtime.Runtime, error) {
return runc.NewRuntime(stateDir)
}

View file

@ -0,0 +1,13 @@
// +build !libcontainer,!runc
package supervisor
import (
"errors"
"github.com/docker/containerd/runtime"
)
func newRuntime(stateDir string) (runtime.Runtime, error) {
return nil, errors.New("unsupported platform")
}

41
supervisor/update.go Normal file
View file

@ -0,0 +1,41 @@
package supervisor
import "github.com/docker/containerd/runtime"
type UpdateEvent struct {
s *Supervisor
}
func (h *UpdateEvent) Handle(e *Event) error {
i, ok := h.s.containers[e.ID]
if !ok {
return ErrContainerNotFound
}
container := i.container
if e.State.Status != "" {
switch e.State.Status {
case runtime.Running:
if err := container.Resume(); err != nil {
return ErrUnknownContainerStatus
}
case runtime.Paused:
if err := container.Pause(); err != nil {
return ErrUnknownContainerStatus
}
default:
return ErrUnknownContainerStatus
}
}
if e.Signal != nil {
// signal the pid1/main process of the container
processes, err := container.Processes()
if err != nil {
return err
}
if len(processes) == 0 {
return ErrProcessNotFound
}
return processes[0].Signal(e.Signal)
}
return nil
}

86
supervisor/worker.go Normal file
View file

@ -0,0 +1,86 @@
package supervisor
import (
"sync"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/runtime"
)
type Worker interface {
Start()
}
type StartTask struct {
Container runtime.Container
Checkpoint string
IO *runtime.IO
Stdin string
Stdout string
Stderr string
Err chan error
StartResponse chan StartResponse
}
func NewWorker(s *Supervisor, wg *sync.WaitGroup) Worker {
return &worker{
s: s,
wg: wg,
}
}
type worker struct {
wg *sync.WaitGroup
s *Supervisor
}
func (w *worker) Start() {
defer w.wg.Done()
for t := range w.s.tasks {
started := time.Now()
l, err := w.s.copyIO(t.Stdin, t.Stdout, t.Stderr, t.IO)
if err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
w.s.containers[t.Container.ID()].copier = l
if t.Checkpoint != "" {
if err := t.Container.Restore(t.Checkpoint); err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
} else {
if err := t.Container.Start(); err != nil {
evt := NewEvent(DeleteEventType)
evt.ID = t.Container.ID()
w.s.SendEvent(evt)
t.Err <- err
continue
}
}
pid, err := t.Container.Pid()
if err != nil {
logrus.WithField("error", err).Error("containerd: get container main pid")
}
if w.s.notifier != nil {
n, err := t.Container.OOM()
if err != nil {
logrus.WithField("error", err).Error("containerd: notify OOM events")
} else {
w.s.notifier.Add(n, t.Container.ID())
}
}
ContainerStartTimer.UpdateSince(started)
t.Err <- nil
t.StartResponse <- StartResponse{
Pid: pid,
}
}
}