libcontainer: Initial version of cgroups support

This is a minimal version of raw cgroup support for libcontainer.
It has only enough for what docker needs, and it has no support
for systemd yet.

Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
This commit is contained in:
Alexander Larsson 2014-02-20 23:12:08 +01:00 committed by Michael Crosby
parent 8590435fa0
commit 3de41b34a2
6 changed files with 218 additions and 10 deletions

View file

@ -40,6 +40,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return parseCgroupFile(subsystem, f)
}
func GetInitCgroupDir(subsystem string) (string, error) {
f, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", err
}
defer f.Close()
return parseCgroupFile(subsystem, f)
}
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
s := bufio.NewScanner(r)
@ -49,8 +59,10 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
}
text := s.Text()
parts := strings.Split(text, ":")
if parts[1] == subsystem {
return parts[2], nil
for _, subs := range strings.Split(parts[1], ",") {
if subs == subsystem {
return parts[2], nil
}
}
}
return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem)

View file

@ -0,0 +1,177 @@
package cgroup
import (
"fmt"
"github.com/dotcloud/docker/pkg/cgroups"
"github.com/dotcloud/docker/pkg/libcontainer"
"io/ioutil"
"os"
"path/filepath"
"strconv"
)
// We have two implementation of cgroups support, one is based on
// systemd and the dbus api, and one is based on raw cgroup fs operations
// following the pre-single-writer model docs at:
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
const (
cgroupRoot = "/sys/fs/cgroup"
)
func useSystemd() bool {
return false
}
func applyCgroupSystemd(container *libcontainer.Container, pid int) error {
return fmt.Errorf("not supported yet")
}
func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func getCgroup(subsystem string, container *libcontainer.Container) (string, error) {
cgroup := container.CgroupName
if container.CgroupParent != "" {
cgroup = filepath.Join(container.CgroupParent, cgroup)
}
initPath, err := cgroups.GetInitCgroupDir(subsystem)
if err != nil {
return "", err
}
path := filepath.Join(cgroupRoot, subsystem, initPath, cgroup)
return path, nil
}
func joinCgroup(subsystem string, container *libcontainer.Container, pid int) (string, error) {
path, err := getCgroup(subsystem, container)
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return "", err
}
if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil {
return "", err
}
return path, nil
}
func applyCgroupRaw(container *libcontainer.Container, pid int) (retErr error) {
if _, err := os.Stat(cgroupRoot); err != nil {
return fmt.Errorf("cgroups fs not found")
}
if !container.DeviceAccess {
dir, err := joinCgroup("devices", container, pid)
if err != nil {
return err
}
defer func() {
if retErr != nil {
os.RemoveAll(dir)
}
}()
if err := writeFile(dir, "devices.deny", "a"); err != nil {
return err
}
allow := []string{
// /dev/null, zero, full
"c 1:3 rwm",
"c 1:5 rwm",
"c 1:7 rwm",
// consoles
"c 5:1 rwm",
"c 5:0 rwm",
"c 4:0 rwm",
"c 4:1 rwm",
// /dev/urandom,/dev/random
"c 1:9 rwm",
"c 1:8 rwm",
// /dev/pts/ - pts namespaces are "coming soon"
"c 136:* rwm",
"c 5:2 rwm",
// tuntap
"c 10:200 rwm",
}
for _, val := range allow {
if err := writeFile(dir, "devices.allow", val); err != nil {
return err
}
}
}
if container.Memory != 0 || container.MemorySwap != 0 {
dir, err := joinCgroup("memory", container, pid)
if err != nil {
return err
}
defer func() {
if retErr != nil {
os.RemoveAll(dir)
}
}()
if container.Memory != 0 {
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
return err
}
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
return err
}
}
if container.MemorySwap != 0 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.MemorySwap, 10)); err != nil {
return err
}
}
}
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
dir, err := joinCgroup("cpu", container, pid)
if err != nil {
return err
}
if container.CpuShares != 0 {
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.CpuShares, 10)); err != nil {
return err
}
}
return nil
}
func CleanupCgroup(container *libcontainer.Container) error {
path, _ := getCgroup("memory", container)
os.RemoveAll(path)
path, _ = getCgroup("devices", container)
os.RemoveAll(path)
path, _ = getCgroup("cpu", container)
os.RemoveAll(path)
return nil
}
func ApplyCgroup(container *libcontainer.Container, pid int) error {
if container.CgroupName == "" {
return nil
}
if useSystemd() {
return applyCgroupSystemd(container, pid)
} else {
return applyCgroupRaw(container, pid)
}
}

View file

@ -11,6 +11,13 @@ type Container struct {
Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply
Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop
Network *Network `json:"network,omitempty"` // nil for host's network stack
CgroupName string `json:"cgroup_name,omitempty"` // name of cgroup
CgroupParent string `json:"cgroup_parent,omitempty"` // name of parent cgroup or slice
DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice
Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes)
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
}
// Network defines configuration for a container's networking stack

View file

@ -34,5 +34,8 @@
"gateway": "172.17.42.1",
"bridge": "docker0",
"mtu": 1500
}
},
"cgroup_name": "docker-koye",
"cgroup_parent": "docker",
"memory": 524800
}

View file

@ -5,6 +5,7 @@ package main
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/cgroup"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system"
@ -33,10 +34,18 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
return -1, err
}
if err := writePidFile(command); err != nil {
command.Process.Kill()
return -1, err
}
defer deletePidFile()
// Do this before syncing with child so that no children
// can escape the cgroup
if err := cgroup.ApplyCgroup(container, command.Process.Pid); err != nil {
command.Process.Kill()
return -1, err
}
if container.Network != nil {
vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid)
if err != nil {
@ -45,6 +54,9 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
sendVethName(vethPair, inPipe)
}
// Sync with child
inPipe.Close()
go io.Copy(os.Stdout, master)
go io.Copy(master, os.Stdin)
@ -67,7 +79,6 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
// pipe so that the child stops waiting for more data
func sendVethName(name string, pipe io.WriteCloser) {
fmt.Fprint(pipe, name)
pipe.Close()
}
// initializeContainerVeth will create a veth pair and setup the host's

View file

@ -20,12 +20,10 @@ func initCommand(container *libcontainer.Container, console string, args []strin
return err
}
var tempVethName string
if container.Network != nil {
tempVethName, err = getVethName()
if err != nil {
return err
}
// We always read this as it is a way to sync with the parent as well
tempVethName, err := getVethName()
if err != nil {
return err
}
// close pipes so that we can replace it with the pty