containerd/supervisor/stats_collector.go

241 lines
6.6 KiB
Go
Raw Normal View History

package supervisor
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/api/grpc/types"
"github.com/docker/containerd/runtime"
"github.com/docker/docker/pkg/pubsub"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/system"
)
func convertBlkioEntryToPb(b []cgroups.BlkioStatEntry) []*types.BlkioStatsEntry {
var pbEs []*types.BlkioStatsEntry
for _, e := range b {
pbEs = append(pbEs, &types.BlkioStatsEntry{
Major: e.Major,
Minor: e.Minor,
Op: e.Op,
Value: e.Value,
})
}
return pbEs
}
func convertToPb(st *runtime.Stat) *types.Stats {
pbSt := &types.Stats{
Timestamp: uint64(st.Timestamp.Unix()),
CgroupStats: &types.CgroupStats{},
}
lcSt, ok := st.Data.(*libcontainer.Stats)
if !ok {
return pbSt
}
cpuSt := lcSt.CgroupStats.CpuStats
pbSt.CgroupStats.CpuStats = &types.CpuStats{
CpuUsage: &types.CpuUsage{
TotalUsage: cpuSt.CpuUsage.TotalUsage,
PercpuUsage: cpuSt.CpuUsage.PercpuUsage,
UsageInKernelmode: cpuSt.CpuUsage.UsageInKernelmode,
UsageInUsermode: cpuSt.CpuUsage.UsageInUsermode,
},
ThrottlingData: &types.ThrottlingData{
Periods: cpuSt.ThrottlingData.Periods,
ThrottledPeriods: cpuSt.ThrottlingData.ThrottledPeriods,
ThrottledTime: cpuSt.ThrottlingData.ThrottledTime,
},
}
memSt := lcSt.CgroupStats.MemoryStats
pbSt.CgroupStats.MemoryStats = &types.MemoryStats{
Cache: memSt.Cache,
Usage: &types.MemoryData{
Usage: memSt.Usage.Usage,
MaxUsage: memSt.Usage.MaxUsage,
Failcnt: memSt.Usage.Failcnt,
},
SwapUsage: &types.MemoryData{
Usage: memSt.SwapUsage.Usage,
MaxUsage: memSt.SwapUsage.MaxUsage,
Failcnt: memSt.SwapUsage.Failcnt,
},
}
blkSt := lcSt.CgroupStats.BlkioStats
pbSt.CgroupStats.BlkioStats = &types.BlkioStats{
IoServiceBytesRecursive: convertBlkioEntryToPb(blkSt.IoServiceBytesRecursive),
IoServicedRecursive: convertBlkioEntryToPb(blkSt.IoServicedRecursive),
IoQueuedRecursive: convertBlkioEntryToPb(blkSt.IoQueuedRecursive),
IoServiceTimeRecursive: convertBlkioEntryToPb(blkSt.IoServiceTimeRecursive),
IoWaitTimeRecursive: convertBlkioEntryToPb(blkSt.IoWaitTimeRecursive),
IoMergedRecursive: convertBlkioEntryToPb(blkSt.IoMergedRecursive),
IoTimeRecursive: convertBlkioEntryToPb(blkSt.IoTimeRecursive),
SectorsRecursive: convertBlkioEntryToPb(blkSt.SectorsRecursive),
}
pbSt.CgroupStats.HugetlbStats = make(map[string]*types.HugetlbStats)
for k, st := range lcSt.CgroupStats.HugetlbStats {
pbSt.CgroupStats.HugetlbStats[k] = &types.HugetlbStats{
Usage: st.Usage,
MaxUsage: st.MaxUsage,
Failcnt: st.Failcnt,
}
}
return pbSt
}
type statsPair struct {
ct runtime.Container
pub *pubsub.Publisher
}
func newStatsCollector(interval time.Duration) *statsCollector {
s := &statsCollector{
interval: interval,
clockTicksPerSecond: uint64(system.GetClockTicks()),
bufReader: bufio.NewReaderSize(nil, 128),
publishers: make(map[string]*statsPair),
}
go s.run()
return s
}
// statsCollector manages and provides container resource stats
type statsCollector struct {
m sync.Mutex
supervisor *Supervisor
interval time.Duration
clockTicksPerSecond uint64
publishers map[string]*statsPair
bufReader *bufio.Reader
}
// collect registers the container with the collector and adds it to
// the event loop for collection on the specified interval returning
// a channel for the subscriber to receive on.
func (s *statsCollector) collect(c runtime.Container) chan interface{} {
s.m.Lock()
defer s.m.Unlock()
publisher, exists := s.publishers[c.ID()]
if !exists {
pub := pubsub.NewPublisher(100*time.Millisecond, 1024)
publisher = &statsPair{ct: c, pub: pub}
s.publishers[c.ID()] = publisher
}
return publisher.pub.Subscribe()
}
// stopCollection closes the channels for all subscribers and removes
// the container from metrics collection.
func (s *statsCollector) stopCollection(c runtime.Container) {
s.m.Lock()
if publisher, exists := s.publishers[c.ID()]; exists {
publisher.pub.Close()
delete(s.publishers, c.ID())
}
s.m.Unlock()
}
// unsubscribe removes a specific subscriber from receiving updates for a container's stats.
func (s *statsCollector) unsubscribe(c runtime.Container, ch chan interface{}) {
s.m.Lock()
publisher := s.publishers[c.ID()]
if publisher != nil {
publisher.pub.Evict(ch)
if publisher.pub.Len() == 0 {
delete(s.publishers, c.ID())
}
}
s.m.Unlock()
}
func (s *statsCollector) run() {
type publishersPair struct {
container runtime.Container
publisher *pubsub.Publisher
}
// we cannot determine the capacity here.
// it will grow enough in first iteration
var pairs []*statsPair
for range time.Tick(s.interval) {
// it does not make sense in the first iteration,
// but saves allocations in further iterations
pairs = pairs[:0]
s.m.Lock()
for _, publisher := range s.publishers {
// copy pointers here to release the lock ASAP
pairs = append(pairs, publisher)
}
s.m.Unlock()
if len(pairs) == 0 {
continue
}
for _, pair := range pairs {
stats, err := pair.ct.Stats()
if err != nil {
logrus.Errorf("Error getting stats for container ID %s", pair.ct.ID())
continue
}
pair.pub.Publish(convertToPb(stats))
}
}
}
const nanoSecondsPerSecond = 1e9
// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds. An error is returned if the format of the underlying
// file does not match.
//
// Uses /proc/stat defined by POSIX. Looks for the cpu
// statistics line and then sums up the first seven fields
// provided. See `man 5 proc` for details on specific field
// information.
func (s *statsCollector) getSystemCPUUsage() (uint64, error) {
var line string
f, err := os.Open("/proc/stat")
if err != nil {
return 0, err
}
defer func() {
s.bufReader.Reset(nil)
f.Close()
}()
s.bufReader.Reset(f)
err = nil
for err == nil {
line, err = s.bufReader.ReadString('\n')
if err != nil {
break
}
parts := strings.Fields(line)
switch parts[0] {
case "cpu":
if len(parts) < 8 {
return 0, fmt.Errorf("bad format of cpu stats")
}
var totalClockTicks uint64
for _, i := range parts[1:8] {
v, err := strconv.ParseUint(i, 10, 64)
if err != nil {
return 0, fmt.Errorf("error parsing cpu stats")
}
totalClockTicks += v
}
return (totalClockTicks * nanoSecondsPerSecond) /
s.clockTicksPerSecond, nil
}
}
return 0, fmt.Errorf("bad stats format")
}