libcontainer: Initial version of cgroups support
This is a minimal version of raw cgroup support for libcontainer. It has only enough for what docker needs, and it has no support for systemd yet. Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
This commit is contained in:
parent
8590435fa0
commit
3de41b34a2
6 changed files with 218 additions and 10 deletions
|
@ -40,6 +40,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
|
||||||
return parseCgroupFile(subsystem, f)
|
return parseCgroupFile(subsystem, f)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetInitCgroupDir(subsystem string) (string, error) {
|
||||||
|
f, err := os.Open("/proc/1/cgroup")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return parseCgroupFile(subsystem, f)
|
||||||
|
}
|
||||||
|
|
||||||
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
|
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
|
||||||
s := bufio.NewScanner(r)
|
s := bufio.NewScanner(r)
|
||||||
|
|
||||||
|
@ -49,8 +59,10 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
|
||||||
}
|
}
|
||||||
text := s.Text()
|
text := s.Text()
|
||||||
parts := strings.Split(text, ":")
|
parts := strings.Split(text, ":")
|
||||||
if parts[1] == subsystem {
|
for _, subs := range strings.Split(parts[1], ",") {
|
||||||
return parts[2], nil
|
if subs == subsystem {
|
||||||
|
return parts[2], nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem)
|
return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem)
|
||||||
|
|
177
libcontainer/cgroup/cgroup.go
Normal file
177
libcontainer/cgroup/cgroup.go
Normal file
|
@ -0,0 +1,177 @@
|
||||||
|
package cgroup
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/dotcloud/docker/pkg/cgroups"
|
||||||
|
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
// We have two implementation of cgroups support, one is based on
|
||||||
|
// systemd and the dbus api, and one is based on raw cgroup fs operations
|
||||||
|
// following the pre-single-writer model docs at:
|
||||||
|
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
|
||||||
|
const (
|
||||||
|
cgroupRoot = "/sys/fs/cgroup"
|
||||||
|
)
|
||||||
|
|
||||||
|
func useSystemd() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyCgroupSystemd(container *libcontainer.Container, pid int) error {
|
||||||
|
return fmt.Errorf("not supported yet")
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeFile(dir, file, data string) error {
|
||||||
|
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCgroup(subsystem string, container *libcontainer.Container) (string, error) {
|
||||||
|
cgroup := container.CgroupName
|
||||||
|
if container.CgroupParent != "" {
|
||||||
|
cgroup = filepath.Join(container.CgroupParent, cgroup)
|
||||||
|
}
|
||||||
|
|
||||||
|
initPath, err := cgroups.GetInitCgroupDir(subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
path := filepath.Join(cgroupRoot, subsystem, initPath, cgroup)
|
||||||
|
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func joinCgroup(subsystem string, container *libcontainer.Container, pid int) (string, error) {
|
||||||
|
path, err := getCgroup(subsystem, container)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyCgroupRaw(container *libcontainer.Container, pid int) (retErr error) {
|
||||||
|
if _, err := os.Stat(cgroupRoot); err != nil {
|
||||||
|
return fmt.Errorf("cgroups fs not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !container.DeviceAccess {
|
||||||
|
dir, err := joinCgroup("devices", container, pid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if retErr != nil {
|
||||||
|
os.RemoveAll(dir)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if err := writeFile(dir, "devices.deny", "a"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
allow := []string{
|
||||||
|
// /dev/null, zero, full
|
||||||
|
"c 1:3 rwm",
|
||||||
|
"c 1:5 rwm",
|
||||||
|
"c 1:7 rwm",
|
||||||
|
|
||||||
|
// consoles
|
||||||
|
"c 5:1 rwm",
|
||||||
|
"c 5:0 rwm",
|
||||||
|
"c 4:0 rwm",
|
||||||
|
"c 4:1 rwm",
|
||||||
|
|
||||||
|
// /dev/urandom,/dev/random
|
||||||
|
"c 1:9 rwm",
|
||||||
|
"c 1:8 rwm",
|
||||||
|
|
||||||
|
// /dev/pts/ - pts namespaces are "coming soon"
|
||||||
|
"c 136:* rwm",
|
||||||
|
"c 5:2 rwm",
|
||||||
|
|
||||||
|
// tuntap
|
||||||
|
"c 10:200 rwm",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, val := range allow {
|
||||||
|
if err := writeFile(dir, "devices.allow", val); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if container.Memory != 0 || container.MemorySwap != 0 {
|
||||||
|
dir, err := joinCgroup("memory", container, pid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if retErr != nil {
|
||||||
|
os.RemoveAll(dir)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if container.Memory != 0 {
|
||||||
|
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if container.MemorySwap != 0 {
|
||||||
|
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.MemorySwap, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||||
|
// on a container basis
|
||||||
|
dir, err := joinCgroup("cpu", container, pid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if container.CpuShares != 0 {
|
||||||
|
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.CpuShares, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func CleanupCgroup(container *libcontainer.Container) error {
|
||||||
|
path, _ := getCgroup("memory", container)
|
||||||
|
os.RemoveAll(path)
|
||||||
|
path, _ = getCgroup("devices", container)
|
||||||
|
os.RemoveAll(path)
|
||||||
|
path, _ = getCgroup("cpu", container)
|
||||||
|
os.RemoveAll(path)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ApplyCgroup(container *libcontainer.Container, pid int) error {
|
||||||
|
if container.CgroupName == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if useSystemd() {
|
||||||
|
return applyCgroupSystemd(container, pid)
|
||||||
|
} else {
|
||||||
|
return applyCgroupRaw(container, pid)
|
||||||
|
}
|
||||||
|
}
|
|
@ -11,6 +11,13 @@ type Container struct {
|
||||||
Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply
|
Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply
|
||||||
Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop
|
Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop
|
||||||
Network *Network `json:"network,omitempty"` // nil for host's network stack
|
Network *Network `json:"network,omitempty"` // nil for host's network stack
|
||||||
|
|
||||||
|
CgroupName string `json:"cgroup_name,omitempty"` // name of cgroup
|
||||||
|
CgroupParent string `json:"cgroup_parent,omitempty"` // name of parent cgroup or slice
|
||||||
|
DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice
|
||||||
|
Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes)
|
||||||
|
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
|
||||||
|
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Network defines configuration for a container's networking stack
|
// Network defines configuration for a container's networking stack
|
||||||
|
|
|
@ -34,5 +34,8 @@
|
||||||
"gateway": "172.17.42.1",
|
"gateway": "172.17.42.1",
|
||||||
"bridge": "docker0",
|
"bridge": "docker0",
|
||||||
"mtu": 1500
|
"mtu": 1500
|
||||||
}
|
},
|
||||||
|
"cgroup_name": "docker-koye",
|
||||||
|
"cgroup_parent": "docker",
|
||||||
|
"memory": 524800
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ package main
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/dotcloud/docker/pkg/libcontainer"
|
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||||
|
"github.com/dotcloud/docker/pkg/libcontainer/cgroup"
|
||||||
"github.com/dotcloud/docker/pkg/libcontainer/network"
|
"github.com/dotcloud/docker/pkg/libcontainer/network"
|
||||||
"github.com/dotcloud/docker/pkg/libcontainer/utils"
|
"github.com/dotcloud/docker/pkg/libcontainer/utils"
|
||||||
"github.com/dotcloud/docker/pkg/system"
|
"github.com/dotcloud/docker/pkg/system"
|
||||||
|
@ -33,10 +34,18 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
if err := writePidFile(command); err != nil {
|
if err := writePidFile(command); err != nil {
|
||||||
|
command.Process.Kill()
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
defer deletePidFile()
|
defer deletePidFile()
|
||||||
|
|
||||||
|
// Do this before syncing with child so that no children
|
||||||
|
// can escape the cgroup
|
||||||
|
if err := cgroup.ApplyCgroup(container, command.Process.Pid); err != nil {
|
||||||
|
command.Process.Kill()
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
|
||||||
if container.Network != nil {
|
if container.Network != nil {
|
||||||
vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid)
|
vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -45,6 +54,9 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
|
||||||
sendVethName(vethPair, inPipe)
|
sendVethName(vethPair, inPipe)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sync with child
|
||||||
|
inPipe.Close()
|
||||||
|
|
||||||
go io.Copy(os.Stdout, master)
|
go io.Copy(os.Stdout, master)
|
||||||
go io.Copy(master, os.Stdin)
|
go io.Copy(master, os.Stdin)
|
||||||
|
|
||||||
|
@ -67,7 +79,6 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
|
||||||
// pipe so that the child stops waiting for more data
|
// pipe so that the child stops waiting for more data
|
||||||
func sendVethName(name string, pipe io.WriteCloser) {
|
func sendVethName(name string, pipe io.WriteCloser) {
|
||||||
fmt.Fprint(pipe, name)
|
fmt.Fprint(pipe, name)
|
||||||
pipe.Close()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// initializeContainerVeth will create a veth pair and setup the host's
|
// initializeContainerVeth will create a veth pair and setup the host's
|
||||||
|
|
|
@ -20,12 +20,10 @@ func initCommand(container *libcontainer.Container, console string, args []strin
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var tempVethName string
|
// We always read this as it is a way to sync with the parent as well
|
||||||
if container.Network != nil {
|
tempVethName, err := getVethName()
|
||||||
tempVethName, err = getVethName()
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// close pipes so that we can replace it with the pty
|
// close pipes so that we can replace it with the pty
|
||||||
|
|
Loading…
Reference in a new issue