2015-12-18 00:07:04 +00:00
|
|
|
package supervisor
|
2015-11-05 23:29:53 +00:00
|
|
|
|
|
|
|
import (
|
2016-02-12 18:17:59 +00:00
|
|
|
"encoding/json"
|
|
|
|
"io"
|
2016-01-06 21:32:46 +00:00
|
|
|
"io/ioutil"
|
2015-11-05 23:29:53 +00:00
|
|
|
"os"
|
2016-02-12 18:17:59 +00:00
|
|
|
"path/filepath"
|
2016-02-01 19:02:41 +00:00
|
|
|
"sort"
|
2015-12-04 21:31:17 +00:00
|
|
|
"sync"
|
2016-02-12 01:26:24 +00:00
|
|
|
"time"
|
2015-11-05 23:29:53 +00:00
|
|
|
|
|
|
|
"github.com/Sirupsen/logrus"
|
2015-12-18 20:17:53 +00:00
|
|
|
"github.com/docker/containerd/chanotify"
|
2015-12-16 21:53:11 +00:00
|
|
|
"github.com/docker/containerd/eventloop"
|
2015-12-01 19:56:08 +00:00
|
|
|
"github.com/docker/containerd/runtime"
|
2015-11-05 23:29:53 +00:00
|
|
|
)
|
|
|
|
|
2015-12-18 00:07:04 +00:00
|
|
|
const (
|
|
|
|
defaultBufferSize = 2048 // size of queue in eventloop
|
|
|
|
)
|
2015-12-14 22:43:00 +00:00
|
|
|
|
2015-12-18 00:07:04 +00:00
|
|
|
// New returns an initialized Process supervisor.
|
2016-02-12 01:26:24 +00:00
|
|
|
func New(stateDir string, oom bool) (*Supervisor, error) {
|
|
|
|
tasks := make(chan *startTask, 10)
|
2015-11-05 23:29:53 +00:00
|
|
|
if err := os.MkdirAll(stateDir, 0755); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-01-06 21:32:46 +00:00
|
|
|
machine, err := CollectMachineInformation()
|
2015-11-05 23:49:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2015-11-05 23:29:53 +00:00
|
|
|
}
|
2016-01-06 21:32:46 +00:00
|
|
|
monitor, err := NewMonitor()
|
2015-12-03 19:49:56 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-11-05 23:49:13 +00:00
|
|
|
s := &Supervisor{
|
2016-02-11 22:36:32 +00:00
|
|
|
stateDir: stateDir,
|
|
|
|
containers: make(map[string]*containerInfo),
|
|
|
|
tasks: tasks,
|
|
|
|
machine: machine,
|
2016-02-12 01:26:24 +00:00
|
|
|
subscribers: make(map[chan Event]struct{}),
|
2016-02-11 22:36:32 +00:00
|
|
|
el: eventloop.NewChanLoop(defaultBufferSize),
|
|
|
|
monitor: monitor,
|
2015-11-07 00:44:52 +00:00
|
|
|
}
|
2016-02-12 18:17:59 +00:00
|
|
|
if err := setupEventLog(s); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-12-16 00:22:53 +00:00
|
|
|
if oom {
|
2015-12-18 20:17:53 +00:00
|
|
|
s.notifier = chanotify.New()
|
|
|
|
go func() {
|
|
|
|
for id := range s.notifier.Chan() {
|
2016-02-12 01:26:24 +00:00
|
|
|
e := NewTask(OOMTaskType)
|
2016-01-22 16:15:13 +00:00
|
|
|
e.ID = id.(string)
|
2016-02-12 01:26:24 +00:00
|
|
|
s.SendTask(e)
|
2015-12-18 20:17:53 +00:00
|
|
|
}
|
|
|
|
}()
|
2015-12-16 00:22:53 +00:00
|
|
|
}
|
2015-12-01 18:55:13 +00:00
|
|
|
// register default event handlers
|
2016-02-12 01:26:24 +00:00
|
|
|
s.handlers = map[TaskType]Handler{
|
|
|
|
ExecExitTaskType: &ExecExitTask{s},
|
|
|
|
ExitTaskType: &ExitTask{s},
|
|
|
|
StartContainerTaskType: &StartTask{s},
|
|
|
|
DeleteTaskType: &DeleteTask{s},
|
|
|
|
GetContainerTaskType: &GetContainersTask{s},
|
|
|
|
SignalTaskType: &SignalTask{s},
|
|
|
|
AddProcessTaskType: &AddProcessTask{s},
|
|
|
|
UpdateContainerTaskType: &UpdateTask{s},
|
|
|
|
CreateCheckpointTaskType: &CreateCheckpointTask{s},
|
|
|
|
DeleteCheckpointTaskType: &DeleteCheckpointTask{s},
|
|
|
|
StatsTaskType: &StatsTask{s},
|
|
|
|
UpdateProcessTaskType: &UpdateProcessTask{s},
|
2015-12-01 18:55:13 +00:00
|
|
|
}
|
2016-01-06 21:32:46 +00:00
|
|
|
go s.exitHandler()
|
|
|
|
if err := s.restore(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-11-05 23:29:53 +00:00
|
|
|
return s, nil
|
|
|
|
}
|
|
|
|
|
2015-12-11 19:27:33 +00:00
|
|
|
type containerInfo struct {
|
|
|
|
container runtime.Container
|
|
|
|
}
|
|
|
|
|
2016-02-12 18:17:59 +00:00
|
|
|
func setupEventLog(s *Supervisor) error {
|
|
|
|
if err := readEventLog(s); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
logrus.WithField("count", len(s.eventLog)).Debug("containerd: read past events")
|
|
|
|
events := s.Events(time.Time{})
|
|
|
|
f, err := os.OpenFile(filepath.Join(s.stateDir, "events.log"), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0755)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
enc := json.NewEncoder(f)
|
|
|
|
go func() {
|
|
|
|
for e := range events {
|
|
|
|
s.eventLog = append(s.eventLog, e)
|
|
|
|
if err := enc.Encode(e); err != nil {
|
|
|
|
logrus.WithField("error", err).Error("containerd: write event to journal")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func readEventLog(s *Supervisor) error {
|
|
|
|
f, err := os.Open(filepath.Join(s.stateDir, "events.log"))
|
|
|
|
if err != nil {
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
dec := json.NewDecoder(f)
|
|
|
|
for {
|
|
|
|
var e Event
|
|
|
|
if err := dec.Decode(&e); err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
s.eventLog = append(s.eventLog, e)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-11-05 23:29:53 +00:00
|
|
|
type Supervisor struct {
|
|
|
|
// stateDir is the directory on the system to store container runtime state information.
|
2015-12-10 22:11:00 +00:00
|
|
|
stateDir string
|
2015-12-11 19:27:33 +00:00
|
|
|
containers map[string]*containerInfo
|
2016-02-12 01:26:24 +00:00
|
|
|
handlers map[TaskType]Handler
|
|
|
|
events chan *Task
|
|
|
|
tasks chan *startTask
|
2015-12-10 22:11:00 +00:00
|
|
|
// we need a lock around the subscribers map only because additions and deletions from
|
|
|
|
// the map are via the API so we cannot really control the concurrency
|
2015-12-10 21:56:49 +00:00
|
|
|
subscriberLock sync.RWMutex
|
2016-02-12 01:26:24 +00:00
|
|
|
subscribers map[chan Event]struct{}
|
2015-12-04 21:31:17 +00:00
|
|
|
machine Machine
|
2015-12-18 20:17:53 +00:00
|
|
|
notifier *chanotify.Notifier
|
2015-12-16 21:53:11 +00:00
|
|
|
el eventloop.EventLoop
|
2016-01-06 21:32:46 +00:00
|
|
|
monitor *Monitor
|
2016-02-12 18:17:59 +00:00
|
|
|
eventLog []Event
|
2015-11-05 23:29:53 +00:00
|
|
|
}
|
|
|
|
|
2015-12-10 22:11:00 +00:00
|
|
|
// Stop closes all tasks and sends a SIGTERM to each container's pid1 then waits for they to
|
|
|
|
// terminate. After it has handled all the SIGCHILD events it will close the signals chan
|
|
|
|
// and exit. Stop is a non-blocking call and will return after the containers have been signaled
|
2016-01-06 21:32:46 +00:00
|
|
|
func (s *Supervisor) Stop() {
|
2015-12-04 21:31:17 +00:00
|
|
|
// Close the tasks channel so that no new containers get started
|
|
|
|
close(s.tasks)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close closes any open files in the supervisor but expects that Stop has been
|
|
|
|
// callsed so that no more containers are started.
|
2015-11-10 22:24:34 +00:00
|
|
|
func (s *Supervisor) Close() error {
|
2015-12-07 22:24:40 +00:00
|
|
|
return nil
|
2015-11-10 22:24:34 +00:00
|
|
|
}
|
|
|
|
|
2016-02-12 01:26:24 +00:00
|
|
|
type Event struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Type string `json:"type"`
|
|
|
|
Timestamp time.Time `json:"timestamp"`
|
|
|
|
Pid string `json:"pid,omitempty"`
|
|
|
|
Status int `json:"status,omitempty"`
|
|
|
|
}
|
|
|
|
|
2015-12-10 22:11:00 +00:00
|
|
|
// Events returns an event channel that external consumers can use to receive updates
|
|
|
|
// on container events
|
2016-02-12 18:17:59 +00:00
|
|
|
func (s *Supervisor) Events(from time.Time) chan Event {
|
2015-12-10 21:56:49 +00:00
|
|
|
s.subscriberLock.Lock()
|
|
|
|
defer s.subscriberLock.Unlock()
|
2016-02-12 01:26:24 +00:00
|
|
|
c := make(chan Event, defaultBufferSize)
|
2015-12-10 20:30:04 +00:00
|
|
|
EventSubscriberCounter.Inc(1)
|
|
|
|
s.subscribers[c] = struct{}{}
|
2016-02-12 18:17:59 +00:00
|
|
|
if !from.IsZero() {
|
|
|
|
// replay old event
|
|
|
|
for _, e := range s.eventLog {
|
|
|
|
if e.Timestamp.After(from) {
|
|
|
|
c <- e
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-12-10 20:30:04 +00:00
|
|
|
return c
|
2015-12-01 23:49:24 +00:00
|
|
|
}
|
|
|
|
|
2015-12-10 22:11:00 +00:00
|
|
|
// Unsubscribe removes the provided channel from receiving any more events
|
2016-02-12 01:26:24 +00:00
|
|
|
func (s *Supervisor) Unsubscribe(sub chan Event) {
|
2015-12-10 21:56:49 +00:00
|
|
|
s.subscriberLock.Lock()
|
|
|
|
defer s.subscriberLock.Unlock()
|
2015-12-01 23:49:24 +00:00
|
|
|
delete(s.subscribers, sub)
|
2015-12-10 21:56:49 +00:00
|
|
|
close(sub)
|
2015-12-10 20:30:04 +00:00
|
|
|
EventSubscriberCounter.Dec(1)
|
2015-12-01 23:49:24 +00:00
|
|
|
}
|
|
|
|
|
2015-12-10 22:11:00 +00:00
|
|
|
// notifySubscribers will send the provided event to the external subscribers
|
|
|
|
// of the events channel
|
2016-02-12 01:26:24 +00:00
|
|
|
func (s *Supervisor) notifySubscribers(e Event) {
|
2015-12-10 21:56:49 +00:00
|
|
|
s.subscriberLock.RLock()
|
|
|
|
defer s.subscriberLock.RUnlock()
|
2015-12-01 23:49:24 +00:00
|
|
|
for sub := range s.subscribers {
|
2015-12-10 21:56:49 +00:00
|
|
|
// do a non-blocking send for the channel
|
|
|
|
select {
|
|
|
|
case sub <- e:
|
|
|
|
default:
|
2016-02-12 18:17:59 +00:00
|
|
|
logrus.WithField("event", e.Type).Warn("containerd: event not sent to subscriber")
|
2015-12-10 21:56:49 +00:00
|
|
|
}
|
2015-12-01 23:49:24 +00:00
|
|
|
}
|
2015-11-30 23:46:36 +00:00
|
|
|
}
|
|
|
|
|
2015-11-06 00:40:57 +00:00
|
|
|
// Start is a non-blocking call that runs the supervisor for monitoring contianer processes and
|
2015-11-05 23:29:53 +00:00
|
|
|
// executing new containers.
|
|
|
|
//
|
2015-12-10 22:11:00 +00:00
|
|
|
// This event loop is the only thing that is allowed to modify state of containers and processes
|
|
|
|
// therefore it is save to do operations in the handlers that modify state of the system or
|
|
|
|
// state of the Supervisor
|
2015-12-03 01:44:39 +00:00
|
|
|
func (s *Supervisor) Start() error {
|
2016-02-12 18:17:59 +00:00
|
|
|
logrus.WithField("stateDir", s.stateDir).Debug("containerd: supervisor running")
|
2015-12-16 21:53:11 +00:00
|
|
|
return s.el.Start()
|
2015-11-05 23:29:53 +00:00
|
|
|
}
|
|
|
|
|
2015-12-03 19:49:56 +00:00
|
|
|
// Machine returns the machine information for which the
|
|
|
|
// supervisor is executing on.
|
|
|
|
func (s *Supervisor) Machine() Machine {
|
|
|
|
return s.machine
|
|
|
|
}
|
|
|
|
|
2016-02-12 01:26:24 +00:00
|
|
|
// SendTask sends the provided event the the supervisors main event loop
|
|
|
|
func (s *Supervisor) SendTask(evt *Task) {
|
|
|
|
TasksCounter.Inc(1)
|
|
|
|
s.el.Send(&commonTask{data: evt, sv: s})
|
2015-11-05 23:29:53 +00:00
|
|
|
}
|
2015-12-11 01:07:21 +00:00
|
|
|
|
2016-01-06 21:32:46 +00:00
|
|
|
func (s *Supervisor) exitHandler() {
|
|
|
|
for p := range s.monitor.Exits() {
|
2016-02-12 01:26:24 +00:00
|
|
|
e := NewTask(ExitTaskType)
|
2016-01-06 21:32:46 +00:00
|
|
|
e.Process = p
|
2016-02-12 01:26:24 +00:00
|
|
|
s.SendTask(e)
|
2015-12-11 01:07:21 +00:00
|
|
|
}
|
2016-01-06 21:32:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Supervisor) monitorProcess(p runtime.Process) error {
|
|
|
|
return s.monitor.Monitor(p)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Supervisor) restore() error {
|
|
|
|
dirs, err := ioutil.ReadDir(s.stateDir)
|
2015-12-11 19:27:33 +00:00
|
|
|
if err != nil {
|
2016-01-06 21:32:46 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, d := range dirs {
|
|
|
|
if !d.IsDir() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
id := d.Name()
|
|
|
|
container, err := runtime.Load(s.stateDir, id)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
processes, err := container.Processes()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
ContainersCounter.Inc(1)
|
|
|
|
s.containers[id] = &containerInfo{
|
|
|
|
container: container,
|
|
|
|
}
|
|
|
|
logrus.WithField("id", id).Debug("containerd: container restored")
|
2016-02-01 19:02:41 +00:00
|
|
|
var exitedProcesses []runtime.Process
|
2016-01-06 21:32:46 +00:00
|
|
|
for _, p := range processes {
|
2016-02-01 19:02:41 +00:00
|
|
|
if _, err := p.ExitStatus(); err == nil {
|
|
|
|
exitedProcesses = append(exitedProcesses, p)
|
|
|
|
} else {
|
|
|
|
if err := s.monitorProcess(p); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(exitedProcesses) > 0 {
|
|
|
|
// sort processes so that init is fired last because that is how the kernel sends the
|
|
|
|
// exit events
|
|
|
|
sort.Sort(&processSorter{exitedProcesses})
|
|
|
|
for _, p := range exitedProcesses {
|
2016-02-12 01:26:24 +00:00
|
|
|
e := NewTask(ExitTaskType)
|
2016-02-01 19:02:41 +00:00
|
|
|
e.Process = p
|
2016-02-12 01:26:24 +00:00
|
|
|
s.SendTask(e)
|
2016-01-06 21:32:46 +00:00
|
|
|
}
|
|
|
|
}
|
2015-12-11 01:07:21 +00:00
|
|
|
}
|
2016-01-06 21:32:46 +00:00
|
|
|
return nil
|
2015-12-11 01:07:21 +00:00
|
|
|
}
|
2016-02-01 19:02:41 +00:00
|
|
|
|
|
|
|
type processSorter struct {
|
|
|
|
processes []runtime.Process
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *processSorter) Len() int {
|
|
|
|
return len(s.processes)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *processSorter) Swap(i, j int) {
|
|
|
|
s.processes[i], s.processes[j] = s.processes[j], s.processes[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *processSorter) Less(i, j int) bool {
|
|
|
|
return s.processes[j].ID() == "init"
|
|
|
|
}
|