2016-01-06 21:32:46 +00:00
|
|
|
package supervisor
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
|
|
|
"syscall"
|
|
|
|
|
|
|
|
"github.com/Sirupsen/logrus"
|
2016-04-06 03:17:30 +00:00
|
|
|
"github.com/docker/containerd/archutils"
|
2016-01-06 21:32:46 +00:00
|
|
|
"github.com/docker/containerd/runtime"
|
|
|
|
)
|
|
|
|
|
2016-06-03 22:00:49 +00:00
|
|
|
// NewMonitor starts a new process monitor and returns it
|
2016-01-06 21:32:46 +00:00
|
|
|
func NewMonitor() (*Monitor, error) {
|
|
|
|
m := &Monitor{
|
2016-02-29 22:15:16 +00:00
|
|
|
receivers: make(map[int]interface{}),
|
2016-01-06 21:32:46 +00:00
|
|
|
exits: make(chan runtime.Process, 1024),
|
2016-02-29 22:15:16 +00:00
|
|
|
ooms: make(chan string, 1024),
|
2016-01-06 21:32:46 +00:00
|
|
|
}
|
2016-04-06 03:17:30 +00:00
|
|
|
fd, err := archutils.EpollCreate1(0)
|
2016-01-06 21:32:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
m.epollFd = fd
|
|
|
|
go m.start()
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
2016-06-03 22:00:49 +00:00
|
|
|
// Monitor represents a runtime.Process monitor
|
2016-01-06 21:32:46 +00:00
|
|
|
type Monitor struct {
|
|
|
|
m sync.Mutex
|
2016-02-29 22:15:16 +00:00
|
|
|
receivers map[int]interface{}
|
2016-01-06 21:32:46 +00:00
|
|
|
exits chan runtime.Process
|
2016-02-29 22:15:16 +00:00
|
|
|
ooms chan string
|
2016-01-06 21:32:46 +00:00
|
|
|
epollFd int
|
|
|
|
}
|
|
|
|
|
2016-06-03 22:00:49 +00:00
|
|
|
// Exits returns the channel used to notify of a process exit
|
2016-01-06 21:32:46 +00:00
|
|
|
func (m *Monitor) Exits() chan runtime.Process {
|
|
|
|
return m.exits
|
|
|
|
}
|
|
|
|
|
2016-06-03 22:00:49 +00:00
|
|
|
// OOMs returns the channel used to notify of a container exit due to OOM
|
2016-02-29 22:15:16 +00:00
|
|
|
func (m *Monitor) OOMs() chan string {
|
|
|
|
return m.ooms
|
|
|
|
}
|
|
|
|
|
2016-06-03 22:00:49 +00:00
|
|
|
// Monitor adds a process to the list of the one being monitored
|
2016-01-06 21:32:46 +00:00
|
|
|
func (m *Monitor) Monitor(p runtime.Process) error {
|
|
|
|
m.m.Lock()
|
|
|
|
defer m.m.Unlock()
|
|
|
|
fd := p.ExitFD()
|
|
|
|
event := syscall.EpollEvent{
|
|
|
|
Fd: int32(fd),
|
|
|
|
Events: syscall.EPOLLHUP,
|
|
|
|
}
|
2016-04-06 03:17:30 +00:00
|
|
|
if err := archutils.EpollCtl(m.epollFd, syscall.EPOLL_CTL_ADD, fd, &event); err != nil {
|
2016-01-06 21:32:46 +00:00
|
|
|
return err
|
|
|
|
}
|
2016-02-01 19:02:41 +00:00
|
|
|
EpollFdCounter.Inc(1)
|
2016-02-29 22:15:16 +00:00
|
|
|
m.receivers[fd] = p
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-06-03 22:00:49 +00:00
|
|
|
// MonitorOOM adds a container to the list of the ones monitored for OOM
|
2016-02-29 22:15:16 +00:00
|
|
|
func (m *Monitor) MonitorOOM(c runtime.Container) error {
|
|
|
|
m.m.Lock()
|
|
|
|
defer m.m.Unlock()
|
|
|
|
o, err := c.OOM()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
fd := o.FD()
|
|
|
|
event := syscall.EpollEvent{
|
|
|
|
Fd: int32(fd),
|
|
|
|
Events: syscall.EPOLLHUP | syscall.EPOLLIN,
|
|
|
|
}
|
2016-04-06 03:17:30 +00:00
|
|
|
if err := archutils.EpollCtl(m.epollFd, syscall.EPOLL_CTL_ADD, fd, &event); err != nil {
|
2016-02-29 22:15:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
EpollFdCounter.Inc(1)
|
|
|
|
m.receivers[fd] = o
|
2016-01-06 21:32:46 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-06-03 22:00:49 +00:00
|
|
|
// Close cleans up resources allocated by NewMonitor()
|
2016-01-06 21:32:46 +00:00
|
|
|
func (m *Monitor) Close() error {
|
|
|
|
return syscall.Close(m.epollFd)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *Monitor) start() {
|
|
|
|
var events [128]syscall.EpollEvent
|
|
|
|
for {
|
2016-04-06 03:17:30 +00:00
|
|
|
n, err := archutils.EpollWait(m.epollFd, events[:], -1)
|
2016-01-06 21:32:46 +00:00
|
|
|
if err != nil {
|
|
|
|
if err == syscall.EINTR {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
logrus.WithField("error", err).Fatal("containerd: epoll wait")
|
|
|
|
}
|
|
|
|
// process events
|
|
|
|
for i := 0; i < n; i++ {
|
2016-02-29 22:15:16 +00:00
|
|
|
fd := int(events[i].Fd)
|
|
|
|
m.m.Lock()
|
|
|
|
r := m.receivers[fd]
|
|
|
|
switch t := r.(type) {
|
|
|
|
case runtime.Process:
|
|
|
|
if events[i].Events == syscall.EPOLLHUP {
|
|
|
|
delete(m.receivers, fd)
|
|
|
|
if err = syscall.EpollCtl(m.epollFd, syscall.EPOLL_CTL_DEL, fd, &syscall.EpollEvent{
|
|
|
|
Events: syscall.EPOLLHUP,
|
|
|
|
Fd: int32(fd),
|
|
|
|
}); err != nil {
|
|
|
|
logrus.WithField("error", err).Error("containerd: epoll remove fd")
|
|
|
|
}
|
|
|
|
if err := t.Close(); err != nil {
|
|
|
|
logrus.WithField("error", err).Error("containerd: close process IO")
|
|
|
|
}
|
|
|
|
EpollFdCounter.Dec(1)
|
|
|
|
m.exits <- t
|
2016-01-06 21:32:46 +00:00
|
|
|
}
|
2016-02-29 22:15:16 +00:00
|
|
|
case runtime.OOM:
|
|
|
|
// always flush the event fd
|
|
|
|
t.Flush()
|
|
|
|
if t.Removed() {
|
|
|
|
delete(m.receivers, fd)
|
|
|
|
// epoll will remove the fd from its set after it has been closed
|
|
|
|
t.Close()
|
|
|
|
EpollFdCounter.Dec(1)
|
|
|
|
} else {
|
|
|
|
m.ooms <- t.ContainerID()
|
2016-01-06 21:32:46 +00:00
|
|
|
}
|
|
|
|
}
|
2016-02-29 22:15:16 +00:00
|
|
|
m.m.Unlock()
|
2016-01-06 21:32:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|