Merge branch 'master' into pluginflag
Conflicts: pkg/cgroups/cgroups.go pkg/libcontainer/nsinit/exec.go pkg/libcontainer/nsinit/init.go pkg/libcontainer/nsinit/mount.go runconfig/hostconfig.go runconfig/parse.go runtime/execdriver/driver.go runtime/execdriver/lxc/lxc_template.go runtime/execdriver/lxc/lxc_template_unit_test.go runtime/execdriver/native/default_template.go runtime/execdriver/native/driver.go Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
commit
42be9fb9d2
17 changed files with 973 additions and 275 deletions
15
cgroups/apply_nosystemd.go
Normal file
15
cgroups/apply_nosystemd.go
Normal file
|
@ -0,0 +1,15 @@
|
|||
// +build !linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func useSystemd() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
189
cgroups/apply_raw.go
Normal file
189
cgroups/apply_raw.go
Normal file
|
@ -0,0 +1,189 @@
|
|||
package cgroups
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type rawCgroup struct {
|
||||
root string
|
||||
cgroup string
|
||||
}
|
||||
|
||||
func rawApply(c *Cgroup, pid int) (ActiveCgroup, error) {
|
||||
// We have two implementation of cgroups support, one is based on
|
||||
// systemd and the dbus api, and one is based on raw cgroup fs operations
|
||||
// following the pre-single-writer model docs at:
|
||||
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
|
||||
//
|
||||
// we can pick any subsystem to find the root
|
||||
|
||||
cgroupRoot, err := FindCgroupMountpoint("cpu")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cgroupRoot = filepath.Dir(cgroupRoot)
|
||||
|
||||
if _, err := os.Stat(cgroupRoot); err != nil {
|
||||
return nil, fmt.Errorf("cgroups fs not found")
|
||||
}
|
||||
|
||||
cgroup := c.Name
|
||||
if c.Parent != "" {
|
||||
cgroup = filepath.Join(c.Parent, cgroup)
|
||||
}
|
||||
|
||||
raw := &rawCgroup{
|
||||
root: cgroupRoot,
|
||||
cgroup: cgroup,
|
||||
}
|
||||
|
||||
if err := raw.setupDevices(c, pid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := raw.setupMemory(c, pid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := raw.setupCpu(c, pid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
func (raw *rawCgroup) path(subsystem string) (string, error) {
|
||||
initPath, err := GetInitCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(raw.root, subsystem, initPath, raw.cgroup), nil
|
||||
}
|
||||
|
||||
func (raw *rawCgroup) join(subsystem string, pid int) (string, error) {
|
||||
path, err := raw.path(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return "", err
|
||||
}
|
||||
if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func (raw *rawCgroup) setupDevices(c *Cgroup, pid int) (err error) {
|
||||
if !c.DeviceAccess {
|
||||
dir, err := raw.join("devices", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := writeFile(dir, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
allow := []string{
|
||||
// /dev/null, zero, full
|
||||
"c 1:3 rwm",
|
||||
"c 1:5 rwm",
|
||||
"c 1:7 rwm",
|
||||
|
||||
// consoles
|
||||
"c 5:1 rwm",
|
||||
"c 5:0 rwm",
|
||||
"c 4:0 rwm",
|
||||
"c 4:1 rwm",
|
||||
|
||||
// /dev/urandom,/dev/random
|
||||
"c 1:9 rwm",
|
||||
"c 1:8 rwm",
|
||||
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
"c 136:* rwm",
|
||||
"c 5:2 rwm",
|
||||
|
||||
// tuntap
|
||||
"c 10:200 rwm",
|
||||
}
|
||||
|
||||
for _, val := range allow {
|
||||
if err := writeFile(dir, "devices.allow", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (raw *rawCgroup) setupMemory(c *Cgroup, pid int) (err error) {
|
||||
if c.Memory != 0 || c.MemorySwap != 0 {
|
||||
dir, err := raw.join("memory", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
}()
|
||||
|
||||
if c.Memory != 0 {
|
||||
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// By default, MemorySwap is set to twice the size of RAM.
|
||||
// If you want to omit MemorySwap, set it to `-1'.
|
||||
if c.MemorySwap != -1 {
|
||||
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (raw *rawCgroup) setupCpu(c *Cgroup, pid int) (err error) {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
dir, err := raw.join("cpu", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c.CpuShares != 0 {
|
||||
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (raw *rawCgroup) Cleanup() error {
|
||||
get := func(subsystem string) string {
|
||||
path, _ := raw.path(subsystem)
|
||||
return path
|
||||
}
|
||||
|
||||
for _, path := range []string{
|
||||
get("memory"),
|
||||
get("devices"),
|
||||
get("cpu"),
|
||||
} {
|
||||
if path != "" {
|
||||
os.RemoveAll(path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
158
cgroups/apply_systemd.go
Normal file
158
cgroups/apply_systemd.go
Normal file
|
@ -0,0 +1,158 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
systemd1 "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/dotcloud/docker/pkg/systemd"
|
||||
"github.com/godbus/dbus"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type systemdCgroup struct {
|
||||
}
|
||||
|
||||
var (
|
||||
connLock sync.Mutex
|
||||
theConn *systemd1.Conn
|
||||
hasStartTransientUnit bool
|
||||
)
|
||||
|
||||
func useSystemd() bool {
|
||||
if !systemd.SdBooted() {
|
||||
return false
|
||||
}
|
||||
|
||||
connLock.Lock()
|
||||
defer connLock.Unlock()
|
||||
|
||||
if theConn == nil {
|
||||
var err error
|
||||
theConn, err = systemd1.New()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Assume we have StartTransientUnit
|
||||
hasStartTransientUnit = true
|
||||
|
||||
// But if we get UnknownMethod error we don't
|
||||
if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
|
||||
hasStartTransientUnit = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hasStartTransientUnit
|
||||
}
|
||||
|
||||
type DeviceAllow struct {
|
||||
Node string
|
||||
Permissions string
|
||||
}
|
||||
|
||||
func getIfaceForUnit(unitName string) string {
|
||||
if strings.HasSuffix(unitName, ".scope") {
|
||||
return "Scope"
|
||||
}
|
||||
if strings.HasSuffix(unitName, ".service") {
|
||||
return "Service"
|
||||
}
|
||||
return "Unit"
|
||||
}
|
||||
|
||||
func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) {
|
||||
unitName := c.Parent + "-" + c.Name + ".scope"
|
||||
slice := "system.slice"
|
||||
|
||||
var properties []systemd1.Property
|
||||
|
||||
for _, v := range c.UnitProperties {
|
||||
switch v[0] {
|
||||
case "Slice":
|
||||
slice = v[1]
|
||||
default:
|
||||
return nil, fmt.Errorf("Unknown unit propery %s", v[0])
|
||||
}
|
||||
}
|
||||
|
||||
properties = append(properties,
|
||||
systemd1.Property{"Slice", dbus.MakeVariant(slice)},
|
||||
systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)},
|
||||
systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})})
|
||||
|
||||
if !c.DeviceAccess {
|
||||
properties = append(properties,
|
||||
systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")},
|
||||
systemd1.Property{"DeviceAllow", dbus.MakeVariant([]DeviceAllow{
|
||||
{"/dev/null", "rwm"},
|
||||
{"/dev/zero", "rwm"},
|
||||
{"/dev/full", "rwm"},
|
||||
{"/dev/random", "rwm"},
|
||||
{"/dev/urandom", "rwm"},
|
||||
{"/dev/tty", "rwm"},
|
||||
{"/dev/console", "rwm"},
|
||||
{"/dev/tty0", "rwm"},
|
||||
{"/dev/tty1", "rwm"},
|
||||
{"/dev/pts/ptmx", "rwm"},
|
||||
// There is no way to add /dev/pts/* here atm, so we hack this manually below
|
||||
// /dev/pts/* (how to add this?)
|
||||
// Same with tuntap, which doesn't exist as a node most of the time
|
||||
})})
|
||||
}
|
||||
|
||||
if c.Memory != 0 {
|
||||
properties = append(properties,
|
||||
systemd1.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))})
|
||||
}
|
||||
// TODO: MemorySwap not available in systemd
|
||||
|
||||
if c.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
systemd1.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))})
|
||||
}
|
||||
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// To work around the lack of /dev/pts/* support above we need to manually add these
|
||||
// so, ask systemd for the cgroup used
|
||||
props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroup := props["ControlGroup"].(string)
|
||||
|
||||
if !c.DeviceAccess {
|
||||
mountpoint, err := FindCgroupMountpoint("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
path := filepath.Join(mountpoint, cgroup)
|
||||
|
||||
// /dev/pts/*
|
||||
if err := writeFile(path, "devices.allow", "c 136:* rwm"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// tuntap
|
||||
if err := writeFile(path, "devices.allow", "c 10:200 rwm"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &systemdCgroup{}, nil
|
||||
}
|
||||
|
||||
func (c *systemdCgroup) Cleanup() error {
|
||||
// systemd cleans up, we don't need to do anything
|
||||
return nil
|
||||
}
|
|
@ -8,7 +8,6 @@ import (
|
|||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
@ -21,6 +20,12 @@ type Cgroup struct {
|
|||
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
|
||||
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
|
||||
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
|
||||
|
||||
UnitProperties [][2]string `json:"unit_properties,omitempty"` // systemd unit properties
|
||||
}
|
||||
|
||||
type ActiveCgroup interface {
|
||||
Cleanup() error
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
|
||||
|
@ -63,49 +68,6 @@ func GetInitCgroupDir(subsystem string) (string, error) {
|
|||
return parseCgroupFile(subsystem, f)
|
||||
}
|
||||
|
||||
func (c *Cgroup) Path(root, subsystem string) (string, error) {
|
||||
cgroup := c.Name
|
||||
if c.Parent != "" {
|
||||
cgroup = filepath.Join(c.Parent, cgroup)
|
||||
}
|
||||
initPath, err := GetInitCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(root, subsystem, initPath, cgroup), nil
|
||||
}
|
||||
|
||||
func (c *Cgroup) Join(root, subsystem string, pid int) (string, error) {
|
||||
path, err := c.Path(root, subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return "", err
|
||||
}
|
||||
if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func (c *Cgroup) Cleanup(root string) error {
|
||||
get := func(subsystem string) string {
|
||||
path, _ := c.Path(root, subsystem)
|
||||
return path
|
||||
}
|
||||
|
||||
for _, path := range []string{
|
||||
get("memory"),
|
||||
get("devices"),
|
||||
get("cpu"),
|
||||
get("cpuset"),
|
||||
} {
|
||||
os.RemoveAll(path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
for s.Scan() {
|
||||
|
@ -127,131 +89,17 @@ func writeFile(dir, file, data string) error {
|
|||
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
||||
}
|
||||
|
||||
func (c *Cgroup) Apply(pid int) error {
|
||||
func (c *Cgroup) Apply(pid int) (ActiveCgroup, error) {
|
||||
// We have two implementation of cgroups support, one is based on
|
||||
// systemd and the dbus api, and one is based on raw cgroup fs operations
|
||||
// following the pre-single-writer model docs at:
|
||||
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
|
||||
//
|
||||
// we can pick any subsystem to find the root
|
||||
cgroupRoot, err := FindCgroupMountpoint("cpu")
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
if useSystemd() {
|
||||
return systemdApply(c, pid)
|
||||
} else {
|
||||
return rawApply(c, pid)
|
||||
}
|
||||
cgroupRoot = filepath.Dir(cgroupRoot)
|
||||
|
||||
if _, err := os.Stat(cgroupRoot); err != nil {
|
||||
return fmt.Errorf("cgroups fs not found")
|
||||
}
|
||||
if err := c.setupDevices(cgroupRoot, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.setupMemory(cgroupRoot, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.setupCpu(cgroupRoot, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.setupCpuset(cgroupRoot, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cgroup) setupDevices(cgroupRoot string, pid int) (err error) {
|
||||
if !c.DeviceAccess {
|
||||
dir, err := c.Join(cgroupRoot, "devices", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := writeFile(dir, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
allow := []string{
|
||||
// /dev/null, zero, full
|
||||
"c 1:3 rwm",
|
||||
"c 1:5 rwm",
|
||||
"c 1:7 rwm",
|
||||
|
||||
// consoles
|
||||
"c 5:1 rwm",
|
||||
"c 5:0 rwm",
|
||||
"c 4:0 rwm",
|
||||
"c 4:1 rwm",
|
||||
|
||||
// /dev/urandom,/dev/random
|
||||
"c 1:9 rwm",
|
||||
"c 1:8 rwm",
|
||||
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
"c 136:* rwm",
|
||||
"c 5:2 rwm",
|
||||
|
||||
// tuntap
|
||||
"c 10:200 rwm",
|
||||
}
|
||||
|
||||
for _, val := range allow {
|
||||
if err := writeFile(dir, "devices.allow", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cgroup) setupMemory(cgroupRoot string, pid int) (err error) {
|
||||
if c.Memory != 0 || c.MemorySwap != 0 {
|
||||
dir, err := c.Join(cgroupRoot, "memory", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
}()
|
||||
|
||||
if c.Memory != 0 {
|
||||
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// By default, MemorySwap is set to twice the size of RAM.
|
||||
// If you want to omit MemorySwap, set it to `-1'.
|
||||
if c.MemorySwap != -1 {
|
||||
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cgroup) setupCpu(cgroupRoot string, pid int) (err error) {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
dir, err := c.Join(cgroupRoot, "cpu", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c.CpuShares != 0 {
|
||||
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cgroup) setupCpuset(cgroupRoot string, pid int) (err error) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue