Merge branch 'master' into pluginflag

Conflicts:
	pkg/cgroups/cgroups.go
	pkg/libcontainer/nsinit/exec.go
	pkg/libcontainer/nsinit/init.go
	pkg/libcontainer/nsinit/mount.go
	runconfig/hostconfig.go
	runconfig/parse.go
	runtime/execdriver/driver.go
	runtime/execdriver/lxc/lxc_template.go
	runtime/execdriver/lxc/lxc_template_unit_test.go
	runtime/execdriver/native/default_template.go
	runtime/execdriver/native/driver.go

Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
Michael Crosby 2014-03-27 08:00:18 +00:00
commit 42be9fb9d2
17 changed files with 973 additions and 275 deletions

View file

@ -0,0 +1,15 @@
// +build !linux
package cgroups
import (
"fmt"
)
func useSystemd() bool {
return false
}
func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) {
return nil, fmt.Errorf("Systemd not supported")
}

189
cgroups/apply_raw.go Normal file
View file

@ -0,0 +1,189 @@
package cgroups
import (
"fmt"
"os"
"path/filepath"
"strconv"
)
type rawCgroup struct {
root string
cgroup string
}
func rawApply(c *Cgroup, pid int) (ActiveCgroup, error) {
// We have two implementation of cgroups support, one is based on
// systemd and the dbus api, and one is based on raw cgroup fs operations
// following the pre-single-writer model docs at:
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
//
// we can pick any subsystem to find the root
cgroupRoot, err := FindCgroupMountpoint("cpu")
if err != nil {
return nil, err
}
cgroupRoot = filepath.Dir(cgroupRoot)
if _, err := os.Stat(cgroupRoot); err != nil {
return nil, fmt.Errorf("cgroups fs not found")
}
cgroup := c.Name
if c.Parent != "" {
cgroup = filepath.Join(c.Parent, cgroup)
}
raw := &rawCgroup{
root: cgroupRoot,
cgroup: cgroup,
}
if err := raw.setupDevices(c, pid); err != nil {
return nil, err
}
if err := raw.setupMemory(c, pid); err != nil {
return nil, err
}
if err := raw.setupCpu(c, pid); err != nil {
return nil, err
}
return raw, nil
}
func (raw *rawCgroup) path(subsystem string) (string, error) {
initPath, err := GetInitCgroupDir(subsystem)
if err != nil {
return "", err
}
return filepath.Join(raw.root, subsystem, initPath, raw.cgroup), nil
}
func (raw *rawCgroup) join(subsystem string, pid int) (string, error) {
path, err := raw.path(subsystem)
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return "", err
}
if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
return "", err
}
return path, nil
}
func (raw *rawCgroup) setupDevices(c *Cgroup, pid int) (err error) {
if !c.DeviceAccess {
dir, err := raw.join("devices", pid)
if err != nil {
return err
}
defer func() {
if err != nil {
os.RemoveAll(dir)
}
}()
if err := writeFile(dir, "devices.deny", "a"); err != nil {
return err
}
allow := []string{
// /dev/null, zero, full
"c 1:3 rwm",
"c 1:5 rwm",
"c 1:7 rwm",
// consoles
"c 5:1 rwm",
"c 5:0 rwm",
"c 4:0 rwm",
"c 4:1 rwm",
// /dev/urandom,/dev/random
"c 1:9 rwm",
"c 1:8 rwm",
// /dev/pts/ - pts namespaces are "coming soon"
"c 136:* rwm",
"c 5:2 rwm",
// tuntap
"c 10:200 rwm",
}
for _, val := range allow {
if err := writeFile(dir, "devices.allow", val); err != nil {
return err
}
}
}
return nil
}
func (raw *rawCgroup) setupMemory(c *Cgroup, pid int) (err error) {
if c.Memory != 0 || c.MemorySwap != 0 {
dir, err := raw.join("memory", pid)
if err != nil {
return err
}
defer func() {
if err != nil {
os.RemoveAll(dir)
}
}()
if c.Memory != 0 {
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
return err
}
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
return err
}
}
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to `-1'.
if c.MemorySwap != -1 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil {
return err
}
}
}
return nil
}
func (raw *rawCgroup) setupCpu(c *Cgroup, pid int) (err error) {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
dir, err := raw.join("cpu", pid)
if err != nil {
return err
}
if c.CpuShares != 0 {
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil {
return err
}
}
return nil
}
func (raw *rawCgroup) Cleanup() error {
get := func(subsystem string) string {
path, _ := raw.path(subsystem)
return path
}
for _, path := range []string{
get("memory"),
get("devices"),
get("cpu"),
} {
if path != "" {
os.RemoveAll(path)
}
}
return nil
}

158
cgroups/apply_systemd.go Normal file
View file

@ -0,0 +1,158 @@
// +build linux
package cgroups
import (
"fmt"
systemd1 "github.com/coreos/go-systemd/dbus"
"github.com/dotcloud/docker/pkg/systemd"
"github.com/godbus/dbus"
"path/filepath"
"strings"
"sync"
)
type systemdCgroup struct {
}
var (
connLock sync.Mutex
theConn *systemd1.Conn
hasStartTransientUnit bool
)
func useSystemd() bool {
if !systemd.SdBooted() {
return false
}
connLock.Lock()
defer connLock.Unlock()
if theConn == nil {
var err error
theConn, err = systemd1.New()
if err != nil {
return false
}
// Assume we have StartTransientUnit
hasStartTransientUnit = true
// But if we get UnknownMethod error we don't
if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
hasStartTransientUnit = false
}
}
}
}
return hasStartTransientUnit
}
type DeviceAllow struct {
Node string
Permissions string
}
func getIfaceForUnit(unitName string) string {
if strings.HasSuffix(unitName, ".scope") {
return "Scope"
}
if strings.HasSuffix(unitName, ".service") {
return "Service"
}
return "Unit"
}
func systemdApply(c *Cgroup, pid int) (ActiveCgroup, error) {
unitName := c.Parent + "-" + c.Name + ".scope"
slice := "system.slice"
var properties []systemd1.Property
for _, v := range c.UnitProperties {
switch v[0] {
case "Slice":
slice = v[1]
default:
return nil, fmt.Errorf("Unknown unit propery %s", v[0])
}
}
properties = append(properties,
systemd1.Property{"Slice", dbus.MakeVariant(slice)},
systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)},
systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})})
if !c.DeviceAccess {
properties = append(properties,
systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")},
systemd1.Property{"DeviceAllow", dbus.MakeVariant([]DeviceAllow{
{"/dev/null", "rwm"},
{"/dev/zero", "rwm"},
{"/dev/full", "rwm"},
{"/dev/random", "rwm"},
{"/dev/urandom", "rwm"},
{"/dev/tty", "rwm"},
{"/dev/console", "rwm"},
{"/dev/tty0", "rwm"},
{"/dev/tty1", "rwm"},
{"/dev/pts/ptmx", "rwm"},
// There is no way to add /dev/pts/* here atm, so we hack this manually below
// /dev/pts/* (how to add this?)
// Same with tuntap, which doesn't exist as a node most of the time
})})
}
if c.Memory != 0 {
properties = append(properties,
systemd1.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))})
}
// TODO: MemorySwap not available in systemd
if c.CpuShares != 0 {
properties = append(properties,
systemd1.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))})
}
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
return nil, err
}
// To work around the lack of /dev/pts/* support above we need to manually add these
// so, ask systemd for the cgroup used
props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName))
if err != nil {
return nil, err
}
cgroup := props["ControlGroup"].(string)
if !c.DeviceAccess {
mountpoint, err := FindCgroupMountpoint("devices")
if err != nil {
return nil, err
}
path := filepath.Join(mountpoint, cgroup)
// /dev/pts/*
if err := writeFile(path, "devices.allow", "c 136:* rwm"); err != nil {
return nil, err
}
// tuntap
if err := writeFile(path, "devices.allow", "c 10:200 rwm"); err != nil {
return nil, err
}
}
return &systemdCgroup{}, nil
}
func (c *systemdCgroup) Cleanup() error {
// systemd cleans up, we don't need to do anything
return nil
}

View file

@ -8,7 +8,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
)
@ -21,6 +20,12 @@ type Cgroup struct {
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
UnitProperties [][2]string `json:"unit_properties,omitempty"` // systemd unit properties
}
type ActiveCgroup interface {
Cleanup() error
}
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
@ -63,49 +68,6 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return parseCgroupFile(subsystem, f)
}
func (c *Cgroup) Path(root, subsystem string) (string, error) {
cgroup := c.Name
if c.Parent != "" {
cgroup = filepath.Join(c.Parent, cgroup)
}
initPath, err := GetInitCgroupDir(subsystem)
if err != nil {
return "", err
}
return filepath.Join(root, subsystem, initPath, cgroup), nil
}
func (c *Cgroup) Join(root, subsystem string, pid int) (string, error) {
path, err := c.Path(root, subsystem)
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return "", err
}
if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil {
return "", err
}
return path, nil
}
func (c *Cgroup) Cleanup(root string) error {
get := func(subsystem string) string {
path, _ := c.Path(root, subsystem)
return path
}
for _, path := range []string{
get("memory"),
get("devices"),
get("cpu"),
get("cpuset"),
} {
os.RemoveAll(path)
}
return nil
}
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
s := bufio.NewScanner(r)
for s.Scan() {
@ -127,131 +89,17 @@ func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func (c *Cgroup) Apply(pid int) error {
func (c *Cgroup) Apply(pid int) (ActiveCgroup, error) {
// We have two implementation of cgroups support, one is based on
// systemd and the dbus api, and one is based on raw cgroup fs operations
// following the pre-single-writer model docs at:
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
//
// we can pick any subsystem to find the root
cgroupRoot, err := FindCgroupMountpoint("cpu")
if err != nil {
return err
if useSystemd() {
return systemdApply(c, pid)
} else {
return rawApply(c, pid)
}
cgroupRoot = filepath.Dir(cgroupRoot)
if _, err := os.Stat(cgroupRoot); err != nil {
return fmt.Errorf("cgroups fs not found")
}
if err := c.setupDevices(cgroupRoot, pid); err != nil {
return err
}
if err := c.setupMemory(cgroupRoot, pid); err != nil {
return err
}
if err := c.setupCpu(cgroupRoot, pid); err != nil {
return err
}
if err := c.setupCpuset(cgroupRoot, pid); err != nil {
return err
}
return nil
}
func (c *Cgroup) setupDevices(cgroupRoot string, pid int) (err error) {
if !c.DeviceAccess {
dir, err := c.Join(cgroupRoot, "devices", pid)
if err != nil {
return err
}
defer func() {
if err != nil {
os.RemoveAll(dir)
}
}()
if err := writeFile(dir, "devices.deny", "a"); err != nil {
return err
}
allow := []string{
// /dev/null, zero, full
"c 1:3 rwm",
"c 1:5 rwm",
"c 1:7 rwm",
// consoles
"c 5:1 rwm",
"c 5:0 rwm",
"c 4:0 rwm",
"c 4:1 rwm",
// /dev/urandom,/dev/random
"c 1:9 rwm",
"c 1:8 rwm",
// /dev/pts/ - pts namespaces are "coming soon"
"c 136:* rwm",
"c 5:2 rwm",
// tuntap
"c 10:200 rwm",
}
for _, val := range allow {
if err := writeFile(dir, "devices.allow", val); err != nil {
return err
}
}
}
return nil
}
func (c *Cgroup) setupMemory(cgroupRoot string, pid int) (err error) {
if c.Memory != 0 || c.MemorySwap != 0 {
dir, err := c.Join(cgroupRoot, "memory", pid)
if err != nil {
return err
}
defer func() {
if err != nil {
os.RemoveAll(dir)
}
}()
if c.Memory != 0 {
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
return err
}
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil {
return err
}
}
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to `-1'.
if c.MemorySwap != -1 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil {
return err
}
}
}
return nil
}
func (c *Cgroup) setupCpu(cgroupRoot string, pid int) (err error) {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
dir, err := c.Join(cgroupRoot, "cpu", pid)
if err != nil {
return err
}
if c.CpuShares != 0 {
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil {
return err
}
}
return nil
}
func (c *Cgroup) setupCpuset(cgroupRoot string, pid int) (err error) {

View file

@ -66,7 +66,6 @@ func (c *Chain) Forward(action Action, ip net.IP, port int, proto, dest_addr str
"-p", proto,
"-d", daddr,
"--dport", strconv.Itoa(port),
"!", "-i", c.Bridge,
"-j", "DNAT",
"--to-destination", net.JoinHostPort(dest_addr, strconv.Itoa(dest_port))); err != nil {
return err

23
label/label.go Normal file
View file

@ -0,0 +1,23 @@
// +build !selinux !linux
package label
func GenLabels(options string) (string, string, error) {
return "", "", nil
}
func FormatMountLabel(src string, MountLabel string) string {
return src
}
func SetProcessLabel(processLabel string) error {
return nil
}
func SetFileLabel(path string, fileLabel string) error {
return nil
}
func GetPidCon(pid int) (string, error) {
return "", nil
}

69
label/label_selinux.go Normal file
View file

@ -0,0 +1,69 @@
// +build selinux,linux
package label
import (
"fmt"
"github.com/dotcloud/docker/pkg/selinux"
"strings"
)
func GenLabels(options string) (string, string, error) {
processLabel, mountLabel := selinux.GetLxcContexts()
var err error
if processLabel == "" { // SELinux is disabled
return "", "", err
}
s := strings.Fields(options)
l := len(s)
if l > 0 {
pcon := selinux.NewContext(processLabel)
for i := 0; i < l; i++ {
o := strings.Split(s[i], "=")
pcon[o[0]] = o[1]
}
processLabel = pcon.Get()
mountLabel, err = selinux.CopyLevel(processLabel, mountLabel)
}
return processLabel, mountLabel, err
}
func FormatMountLabel(src string, MountLabel string) string {
var mountLabel string
if src != "" {
mountLabel = src
if MountLabel != "" {
mountLabel = fmt.Sprintf("%s,context=\"%s\"", mountLabel, MountLabel)
}
} else {
if MountLabel != "" {
mountLabel = fmt.Sprintf("context=\"%s\"", MountLabel)
}
}
return mountLabel
}
func SetProcessLabel(processLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setexeccon(processLabel)
}
return nil
}
func GetProcessLabel() (string, error) {
if selinux.SelinuxEnabled() {
return selinux.Getexeccon()
}
return "", nil
}
func SetFileLabel(path string, fileLabel string) error {
if selinux.SelinuxEnabled() && fileLabel != "" {
return selinux.Setfilecon(path, fileLabel)
}
return nil
}
func GetPidCon(pid int) (string, error) {
return selinux.Getpidcon(pid)
}

View file

@ -7,6 +7,7 @@ import (
"os/exec"
"syscall"
"github.com/dotcloud/docker/pkg/cgroups"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/system"
@ -62,10 +63,15 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [
// Do this before syncing with child so that no children
// can escape the cgroup
ns.logger.Println("setting cgroups")
if err := ns.SetupCgroups(container, command.Process.Pid); err != nil {
activeCgroup, err := ns.SetupCgroups(container, command.Process.Pid)
if err != nil {
command.Process.Kill()
return -1, err
}
if activeCgroup != nil {
defer activeCgroup.Cleanup()
}
ns.logger.Println("setting up network")
if err := ns.InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil {
command.Process.Kill()
@ -86,13 +92,11 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [
return status, err
}
func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) error {
func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) {
if container.Cgroups != nil {
if err := container.Cgroups.Apply(nspid); err != nil {
return err
}
return container.Cgroups.Apply(nspid)
}
return nil
return nil, nil
}
func (ns *linuxNs) InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error {

View file

@ -4,6 +4,7 @@ package nsinit
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/system"
"os"
@ -32,7 +33,11 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s
closeFds()
return -1, err
}
processLabel, err := label.GetPidCon(nspid)
if err != nil {
closeFds()
return -1, err
}
// foreach namespace fd, use setns to join an existing container's namespaces
for _, fd := range fds {
if fd > 0 {
@ -80,6 +85,10 @@ dropAndExec:
if err := finalizeNamespace(container); err != nil {
return -1, err
}
err = label.SetProcessLabel(processLabel)
if err != nil {
return -1, err
}
if err := system.Execv(args[0], args[0:], container.Env); err != nil {
return -1, err
}

View file

@ -5,8 +5,10 @@ package nsinit
import (
"fmt"
"os"
"runtime"
"syscall"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/apparmor"
"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
@ -61,7 +63,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
return fmt.Errorf("setup networking %s", err)
}
ns.logger.Println("setup mount namespace")
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot); err != nil {
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if err := system.Sethostname(container.Hostname); err != nil {
@ -77,6 +79,10 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
return err
}
}
runtime.LockOSThread()
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
return fmt.Errorf("SetProcessLabel label %s", err)
}
ns.logger.Printf("execing %s\n", args[0])
return system.Execv(args[0], args[0:], container.Env)
}

View file

@ -4,6 +4,7 @@ package nsinit
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/system"
"io/ioutil"
@ -20,7 +21,7 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD
//
// There is no need to unmount the new mounts because as soon as the mount namespace
// is no longer in use, the mounts will be removed automatically
func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool) error {
func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool, mountLabel string) error {
flag := syscall.MS_PRIVATE
if noPivotRoot {
flag = syscall.MS_SLAVE
@ -31,7 +32,7 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
}
if err := mountSystem(rootfs); err != nil {
if err := mountSystem(rootfs, mountLabel); err != nil {
return fmt.Errorf("mount system %s", err)
}
@ -59,7 +60,7 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons
if err := setupDev(rootfs); err != nil {
return err
}
if err := setupPtmx(rootfs, console); err != nil {
if err := setupPtmx(rootfs, console, mountLabel); err != nil {
return err
}
if err := system.Chdir(rootfs); err != nil {
@ -197,7 +198,7 @@ func setupDev(rootfs string) error {
}
// setupConsole ensures that the container has a proper /dev/console setup
func setupConsole(rootfs, console string) error {
func setupConsole(rootfs, console string, mountLabel string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
@ -221,6 +222,9 @@ func setupConsole(rootfs, console string) error {
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
return fmt.Errorf("mknod %s %s", dest, err)
}
if err := label.SetFileLabel(console, mountLabel); err != nil {
return fmt.Errorf("SetFileLabel Failed %s %s", dest, err)
}
if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", console, dest, err)
}
@ -229,7 +233,7 @@ func setupConsole(rootfs, console string) error {
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string) error {
func mountSystem(rootfs string, mountLabel string) error {
for _, m := range []struct {
source string
path string
@ -239,8 +243,8 @@ func mountSystem(rootfs string) error {
}{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: "mode=1777,size=65536k"},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=620,gid=5"},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1755,size=65536k", mountLabel)},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
} {
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdirall %s %s", m.path, err)
@ -254,7 +258,7 @@ func mountSystem(rootfs string) error {
// setupPtmx adds a symlink to pts/ptmx for /dev/ptmx and
// finishes setting up /dev/console
func setupPtmx(rootfs, console string) error {
func setupPtmx(rootfs, console string, mountLabel string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
@ -263,7 +267,7 @@ func setupPtmx(rootfs, console string) error {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != "" {
if err := setupConsole(rootfs, console); err != nil {
if err := setupConsole(rootfs, console, mountLabel); err != nil {
return err
}
}

387
selinux/selinux.go Normal file
View file

@ -0,0 +1,387 @@
package selinux
import (
"bufio"
"crypto/rand"
"encoding/binary"
"fmt"
"github.com/dotcloud/docker/pkg/mount"
"github.com/dotcloud/docker/pkg/system"
"io"
"os"
"regexp"
"strconv"
"strings"
"syscall"
)
const (
Enforcing = 1
Permissive = 0
Disabled = -1
selinuxDir = "/etc/selinux/"
selinuxConfig = selinuxDir + "config"
selinuxTypeTag = "SELINUXTYPE"
selinuxTag = "SELINUX"
selinuxPath = "/sys/fs/selinux"
xattrNameSelinux = "security.selinux"
stRdOnly = 0x01
)
var (
assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`)
spaceRegex = regexp.MustCompile(`^([^=]+) (.*)$`)
mcsList = make(map[string]bool)
selinuxfs = "unknown"
selinuxEnabled = false
selinuxEnabledChecked = false
)
type SELinuxContext map[string]string
func GetSelinuxMountPoint() string {
if selinuxfs != "unknown" {
return selinuxfs
}
selinuxfs = ""
mounts, err := mount.GetMounts()
if err != nil {
return selinuxfs
}
for _, mount := range mounts {
if mount.Fstype == "selinuxfs" {
selinuxfs = mount.Mountpoint
break
}
}
if selinuxfs != "" {
var buf syscall.Statfs_t
syscall.Statfs(selinuxfs, &buf)
if (buf.Flags & stRdOnly) == 1 {
selinuxfs = ""
}
}
return selinuxfs
}
func SelinuxEnabled() bool {
if selinuxEnabledChecked {
return selinuxEnabled
}
selinuxEnabledChecked = true
if fs := GetSelinuxMountPoint(); fs != "" {
if con, _ := Getcon(); con != "kernel" {
selinuxEnabled = true
}
}
return selinuxEnabled
}
func ReadConfig(target string) (value string) {
var (
val, key string
bufin *bufio.Reader
)
in, err := os.Open(selinuxConfig)
if err != nil {
return ""
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err != io.EOF {
return ""
}
done = true
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == target {
return strings.Trim(val, "\"")
}
}
}
return ""
}
func GetSELinuxPolicyRoot() string {
return selinuxDir + ReadConfig(selinuxTypeTag)
}
func readCon(name string) (string, error) {
var val string
in, err := os.Open(name)
if err != nil {
return "", err
}
defer in.Close()
_, err = fmt.Fscanf(in, "%s", &val)
return val, err
}
func Setfilecon(path string, scon string) error {
return system.Lsetxattr(path, xattrNameSelinux, []byte(scon), 0)
}
func Getfilecon(path string) (string, error) {
var scon []byte
cnt, err := syscall.Getxattr(path, xattrNameSelinux, scon)
scon = make([]byte, cnt)
cnt, err = syscall.Getxattr(path, xattrNameSelinux, scon)
return string(scon), err
}
func Setfscreatecon(scon string) error {
return writeCon("/proc/self/attr/fscreate", scon)
}
func Getfscreatecon() (string, error) {
return readCon("/proc/self/attr/fscreate")
}
func Getcon() (string, error) {
return readCon("/proc/self/attr/current")
}
func Getpidcon(pid int) (string, error) {
return readCon(fmt.Sprintf("/proc/%d/attr/current", pid))
}
func Getexeccon() (string, error) {
return readCon("/proc/self/attr/exec")
}
func writeCon(name string, val string) error {
if !SelinuxEnabled() {
return nil
}
out, err := os.OpenFile(name, os.O_WRONLY, 0)
if err != nil {
return err
}
defer out.Close()
if val != "" {
_, err = out.Write([]byte(val))
} else {
_, err = out.Write(nil)
}
return err
}
func Setexeccon(scon string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), scon)
}
func (c SELinuxContext) Get() string {
return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"])
}
func NewContext(scon string) SELinuxContext {
c := make(SELinuxContext)
if len(scon) != 0 {
con := strings.SplitN(scon, ":", 4)
c["user"] = con[0]
c["role"] = con[1]
c["type"] = con[2]
c["level"] = con[3]
}
return c
}
func SelinuxGetEnforce() int {
var enforce int
enforceS, err := readCon(fmt.Sprintf("%s/enforce", selinuxPath))
if err != nil {
return -1
}
enforce, err = strconv.Atoi(string(enforceS))
if err != nil {
return -1
}
return enforce
}
func SelinuxGetEnforceMode() int {
switch ReadConfig(selinuxTag) {
case "enforcing":
return Enforcing
case "permissive":
return Permissive
}
return Disabled
}
func mcsAdd(mcs string) {
mcsList[mcs] = true
}
func mcsDelete(mcs string) {
mcsList[mcs] = false
}
func mcsExists(mcs string) bool {
return mcsList[mcs]
}
func IntToMcs(id int, catRange uint32) string {
var (
SETSIZE = int(catRange)
TIER = SETSIZE
ORD = id
)
if id < 1 || id > 523776 {
return ""
}
for ORD > TIER {
ORD = ORD - TIER
TIER -= 1
}
TIER = SETSIZE - TIER
ORD = ORD + TIER
return fmt.Sprintf("s0:c%d,c%d", TIER, ORD)
}
func uniqMcs(catRange uint32) string {
var (
n uint32
c1, c2 uint32
mcs string
)
for {
binary.Read(rand.Reader, binary.LittleEndian, &n)
c1 = n % catRange
binary.Read(rand.Reader, binary.LittleEndian, &n)
c2 = n % catRange
if c1 == c2 {
continue
} else {
if c1 > c2 {
t := c1
c1 = c2
c2 = t
}
}
mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2)
if mcsExists(mcs) {
continue
}
mcsAdd(mcs)
break
}
return mcs
}
func FreeContext(con string) {
if con != "" {
scon := NewContext(con)
mcsDelete(scon["level"])
}
}
func GetLxcContexts() (processLabel string, fileLabel string) {
var (
val, key string
bufin *bufio.Reader
)
if !SelinuxEnabled() {
return "", ""
}
lxcPath := fmt.Sprintf("%s/content/lxc_contexts", GetSELinuxPolicyRoot())
fileLabel = "system_u:object_r:svirt_sandbox_file_t:s0"
processLabel = "system_u:system_r:svirt_lxc_net_t:s0"
in, err := os.Open(lxcPath)
if err != nil {
goto exit
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err == io.EOF {
done = true
} else {
goto exit
}
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == "process" {
processLabel = strings.Trim(val, "\"")
}
if key == "file" {
fileLabel = strings.Trim(val, "\"")
}
}
}
exit:
mcs := IntToMcs(os.Getpid(), 1024)
scon := NewContext(processLabel)
scon["level"] = mcs
processLabel = scon.Get()
scon = NewContext(fileLabel)
scon["level"] = mcs
fileLabel = scon.Get()
return processLabel, fileLabel
}
func SecurityCheckContext(val string) error {
return writeCon(fmt.Sprintf("%s.context", selinuxPath), val)
}
func CopyLevel(src, dest string) (string, error) {
if !SelinuxEnabled() {
return "", nil
}
if src == "" {
return "", nil
}
if err := SecurityCheckContext(src); err != nil {
return "", err
}
if err := SecurityCheckContext(dest); err != nil {
return "", err
}
scon := NewContext(src)
tcon := NewContext(dest)
tcon["level"] = scon["level"]
return tcon.Get(), nil
}

64
selinux/selinux_test.go Normal file
View file

@ -0,0 +1,64 @@
package selinux_test
import (
"github.com/dotcloud/docker/pkg/selinux"
"os"
"testing"
)
func testSetfilecon(t *testing.T) {
if selinux.SelinuxEnabled() {
tmp := "selinux_test"
out, _ := os.OpenFile(tmp, os.O_WRONLY, 0)
out.Close()
err := selinux.Setfilecon(tmp, "system_u:object_r:bin_t:s0")
if err == nil {
t.Log(selinux.Getfilecon(tmp))
} else {
t.Log("Setfilecon failed")
t.Fatal(err)
}
os.Remove(tmp)
}
}
func TestSELinux(t *testing.T) {
var (
err error
plabel, flabel string
)
if selinux.SelinuxEnabled() {
t.Log("Enabled")
plabel, flabel = selinux.GetLxcContexts()
t.Log(plabel)
t.Log(flabel)
plabel, flabel = selinux.GetLxcContexts()
t.Log(plabel)
t.Log(flabel)
t.Log("getenforce ", selinux.SelinuxGetEnforce())
t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode())
pid := os.Getpid()
t.Log("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023))
t.Log(selinux.Getcon())
t.Log(selinux.Getfilecon("/etc/passwd"))
err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0")
if err == nil {
t.Log(selinux.Getfscreatecon())
} else {
t.Log("setfscreatecon failed", err)
t.Fatal(err)
}
err = selinux.Setfscreatecon("")
if err == nil {
t.Log(selinux.Getfscreatecon())
} else {
t.Log("setfscreatecon failed", err)
t.Fatal(err)
}
t.Log(selinux.Getpidcon(1))
t.Log(selinux.GetSelinuxMountPoint())
} else {
t.Log("Disabled")
}
}

View file

@ -1,55 +0,0 @@
/*
Copyright 2013 CoreOS Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package activation implements primitives for systemd socket activation.
package activation
import (
"os"
"strconv"
"syscall"
)
// based on: https://gist.github.com/alberts/4640792
const (
listenFdsStart = 3
)
func Files(unsetEnv bool) []*os.File {
if unsetEnv {
// there is no way to unset env in golang os package for now
// https://code.google.com/p/go/issues/detail?id=6423
defer os.Setenv("LISTEN_PID", "")
defer os.Setenv("LISTEN_FDS", "")
}
pid, err := strconv.Atoi(os.Getenv("LISTEN_PID"))
if err != nil || pid != os.Getpid() {
return nil
}
nfds, err := strconv.Atoi(os.Getenv("LISTEN_FDS"))
if err != nil || nfds == 0 {
return nil
}
var files []*os.File
for fd := listenFdsStart; fd < listenFdsStart+nfds; fd++ {
syscall.CloseOnExec(fd)
files = append(files, os.NewFile(uintptr(fd), "LISTEN_FD_"+strconv.Itoa(fd)))
}
return files
}

View file

@ -1,37 +0,0 @@
/*
Copyright 2014 CoreOS Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package activation
import (
"fmt"
"net"
)
// Listeners returns net.Listeners for all socket activated fds passed to this process.
func Listeners(unsetEnv bool) ([]net.Listener, error) {
files := Files(unsetEnv)
listeners := make([]net.Listener, len(files))
for i, f := range files {
var err error
listeners[i], err = net.FileListener(f)
if err != nil {
return nil, fmt.Errorf("Error setting up FileListener for fd %d: %s", f.Fd(), err.Error())
}
}
return listeners, nil
}

15
systemd/booted.go Normal file
View file

@ -0,0 +1,15 @@
package systemd
import (
"os"
)
// Conversion to Go of systemd's sd_booted()
func SdBooted() bool {
s, err := os.Stat("/run/systemd/system")
if err != nil {
return false
}
return s.IsDir()
}

View file

@ -5,7 +5,7 @@ import (
"net"
"strconv"
"github.com/dotcloud/docker/pkg/systemd/activation"
"github.com/coreos/go-systemd/activation"
)
// ListenFD returns the specified socket activated files as a slice of