libcontainer: Initial version of cgroups support
This is a minimal version of raw cgroup support for libcontainer. It has only enough for what docker needs, and it has no support for systemd yet. Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
This commit is contained in:
parent
8590435fa0
commit
3de41b34a2
6 changed files with 218 additions and 10 deletions
|
@ -40,6 +40,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
|
|||
return parseCgroupFile(subsystem, f)
|
||||
}
|
||||
|
||||
func GetInitCgroupDir(subsystem string) (string, error) {
|
||||
f, err := os.Open("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return parseCgroupFile(subsystem, f)
|
||||
}
|
||||
|
||||
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
|
||||
|
@ -49,8 +59,10 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
|
|||
}
|
||||
text := s.Text()
|
||||
parts := strings.Split(text, ":")
|
||||
if parts[1] == subsystem {
|
||||
return parts[2], nil
|
||||
for _, subs := range strings.Split(parts[1], ",") {
|
||||
if subs == subsystem {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem)
|
||||
|
|
177
libcontainer/cgroup/cgroup.go
Normal file
177
libcontainer/cgroup/cgroup.go
Normal file
|
@ -0,0 +1,177 @@
|
|||
package cgroup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/cgroups"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// We have two implementation of cgroups support, one is based on
|
||||
// systemd and the dbus api, and one is based on raw cgroup fs operations
|
||||
// following the pre-single-writer model docs at:
|
||||
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
|
||||
const (
|
||||
cgroupRoot = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
func useSystemd() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func applyCgroupSystemd(container *libcontainer.Container, pid int) error {
|
||||
return fmt.Errorf("not supported yet")
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
||||
}
|
||||
|
||||
func getCgroup(subsystem string, container *libcontainer.Container) (string, error) {
|
||||
cgroup := container.CgroupName
|
||||
if container.CgroupParent != "" {
|
||||
cgroup = filepath.Join(container.CgroupParent, cgroup)
|
||||
}
|
||||
|
||||
initPath, err := cgroups.GetInitCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
path := filepath.Join(cgroupRoot, subsystem, initPath, cgroup)
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func joinCgroup(subsystem string, container *libcontainer.Container, pid int) (string, error) {
|
||||
path, err := getCgroup(subsystem, container)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func applyCgroupRaw(container *libcontainer.Container, pid int) (retErr error) {
|
||||
if _, err := os.Stat(cgroupRoot); err != nil {
|
||||
return fmt.Errorf("cgroups fs not found")
|
||||
}
|
||||
|
||||
if !container.DeviceAccess {
|
||||
dir, err := joinCgroup("devices", container, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := writeFile(dir, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
allow := []string{
|
||||
// /dev/null, zero, full
|
||||
"c 1:3 rwm",
|
||||
"c 1:5 rwm",
|
||||
"c 1:7 rwm",
|
||||
|
||||
// consoles
|
||||
"c 5:1 rwm",
|
||||
"c 5:0 rwm",
|
||||
"c 4:0 rwm",
|
||||
"c 4:1 rwm",
|
||||
|
||||
// /dev/urandom,/dev/random
|
||||
"c 1:9 rwm",
|
||||
"c 1:8 rwm",
|
||||
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
"c 136:* rwm",
|
||||
"c 5:2 rwm",
|
||||
|
||||
// tuntap
|
||||
"c 10:200 rwm",
|
||||
}
|
||||
|
||||
for _, val := range allow {
|
||||
if err := writeFile(dir, "devices.allow", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if container.Memory != 0 || container.MemorySwap != 0 {
|
||||
dir, err := joinCgroup("memory", container, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
}()
|
||||
|
||||
if container.Memory != 0 {
|
||||
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if container.MemorySwap != 0 {
|
||||
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
dir, err := joinCgroup("cpu", container, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if container.CpuShares != 0 {
|
||||
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func CleanupCgroup(container *libcontainer.Container) error {
|
||||
path, _ := getCgroup("memory", container)
|
||||
os.RemoveAll(path)
|
||||
path, _ = getCgroup("devices", container)
|
||||
os.RemoveAll(path)
|
||||
path, _ = getCgroup("cpu", container)
|
||||
os.RemoveAll(path)
|
||||
return nil
|
||||
}
|
||||
|
||||
func ApplyCgroup(container *libcontainer.Container, pid int) error {
|
||||
if container.CgroupName == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
if useSystemd() {
|
||||
return applyCgroupSystemd(container, pid)
|
||||
} else {
|
||||
return applyCgroupRaw(container, pid)
|
||||
}
|
||||
}
|
|
@ -11,6 +11,13 @@ type Container struct {
|
|||
Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply
|
||||
Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop
|
||||
Network *Network `json:"network,omitempty"` // nil for host's network stack
|
||||
|
||||
CgroupName string `json:"cgroup_name,omitempty"` // name of cgroup
|
||||
CgroupParent string `json:"cgroup_parent,omitempty"` // name of parent cgroup or slice
|
||||
DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice
|
||||
Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes)
|
||||
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
|
||||
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
|
||||
}
|
||||
|
||||
// Network defines configuration for a container's networking stack
|
||||
|
|
|
@ -34,5 +34,8 @@
|
|||
"gateway": "172.17.42.1",
|
||||
"bridge": "docker0",
|
||||
"mtu": 1500
|
||||
}
|
||||
},
|
||||
"cgroup_name": "docker-koye",
|
||||
"cgroup_parent": "docker",
|
||||
"memory": 524800
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ package main
|
|||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/cgroup"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/network"
|
||||
"github.com/dotcloud/docker/pkg/libcontainer/utils"
|
||||
"github.com/dotcloud/docker/pkg/system"
|
||||
|
@ -33,10 +34,18 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
|
|||
return -1, err
|
||||
}
|
||||
if err := writePidFile(command); err != nil {
|
||||
command.Process.Kill()
|
||||
return -1, err
|
||||
}
|
||||
defer deletePidFile()
|
||||
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := cgroup.ApplyCgroup(container, command.Process.Pid); err != nil {
|
||||
command.Process.Kill()
|
||||
return -1, err
|
||||
}
|
||||
|
||||
if container.Network != nil {
|
||||
vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid)
|
||||
if err != nil {
|
||||
|
@ -45,6 +54,9 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
|
|||
sendVethName(vethPair, inPipe)
|
||||
}
|
||||
|
||||
// Sync with child
|
||||
inPipe.Close()
|
||||
|
||||
go io.Copy(os.Stdout, master)
|
||||
go io.Copy(master, os.Stdin)
|
||||
|
||||
|
@ -67,7 +79,6 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
|
|||
// pipe so that the child stops waiting for more data
|
||||
func sendVethName(name string, pipe io.WriteCloser) {
|
||||
fmt.Fprint(pipe, name)
|
||||
pipe.Close()
|
||||
}
|
||||
|
||||
// initializeContainerVeth will create a veth pair and setup the host's
|
||||
|
|
|
@ -20,12 +20,10 @@ func initCommand(container *libcontainer.Container, console string, args []strin
|
|||
return err
|
||||
}
|
||||
|
||||
var tempVethName string
|
||||
if container.Network != nil {
|
||||
tempVethName, err = getVethName()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// We always read this as it is a way to sync with the parent as well
|
||||
tempVethName, err := getVethName()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// close pipes so that we can replace it with the pty
|
||||
|
|
Loading…
Reference in a new issue