Remove and vendor libcontainer

Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
Michael Crosby 2014-06-10 19:49:57 -07:00
parent c9ed49398e
commit 8a186aa82b
82 changed files with 0 additions and 6936 deletions

View file

@ -1,4 +0,0 @@
Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
Guillaume J. Charmes <guillaume@docker.com> (@creack)
Rohit Jnagal <jnagal@google.com> (@rjnagal)
Victor Marmol <vmarmol@google.com> (@vmarmol)

View file

@ -1,37 +0,0 @@
## libcontainer - reference implementation for containers
#### background
libcontainer specifies configuration options for what a container is. It provides a native Go implementation
for using Linux namespaces with no external dependencies. libcontainer provides many convenience functions for working with namespaces, networking, and management.
#### container
A container is a self contained directory that is able to run one or more processes without
affecting the host system. The directory is usually a full system tree. Inside the directory
a `container.json` file is placed with the runtime configuration for how the processes
should be contained and ran. Environment, networking, and different capabilities for the
process are specified in this file. The configuration is used for each process executed inside the container.
See the `container.json` file for what the configuration should look like.
Using this configuration and the current directory holding the rootfs for a process, one can use libcontainer to exec the container. Running the life of the namespace, a `pid` file
is written to the current directory with the pid of the namespaced process to the external world. A client can use this pid to wait, kill, or perform other operation with the container. If a user tries to run a new process inside an existing container with a live namespace, the namespace will be joined by the new process.
You may also specify an alternate root place where the `container.json` file is read and where the `pid` file will be saved.
#### nsinit
`nsinit` is a cli application used as the reference implementation of libcontainer. It is able to
spawn or join new containers giving the current directory. To use `nsinit` cd into a Linux
rootfs and copy a `container.json` file into the directory with your specified configuration.
To execute `/bin/bash` in the current directory as a container just run:
```bash
nsinit exec /bin/bash
```
If you wish to spawn another process inside the container while your current bash session is
running just run the exact same command again to get another bash shell or change the command. If the original process dies, PID 1, all other processes spawned inside the container will also be killed and the namespace will be removed.
You can identify if a process is running in a container by looking to see if `pid` is in the root of the directory.

View file

@ -1,11 +0,0 @@
#### goals
* small and simple - line count is not everything but less code is better
* provide primitives for working with namespaces not cater to every option
* extend via configuration not by features - host networking, no networking, veth network can be accomplished via adjusting the container.json, nothing to do with code
#### tasks
* reexec or raw syscalls for new process in existing container
* example configs for different setups (host networking, boot init)
* improve pkg documentation with comments
* testing - this is hard in a low level pkg but we could do some, maybe
* selinux

View file

@ -1,35 +0,0 @@
// +build apparmor,linux,amd64
package apparmor
// #cgo LDFLAGS: -lapparmor
// #include <sys/apparmor.h>
// #include <stdlib.h>
import "C"
import (
"io/ioutil"
"os"
"unsafe"
)
func IsEnabled() bool {
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
return err == nil && len(buf) > 1 && buf[0] == 'Y'
}
return false
}
func ApplyProfile(name string) error {
if name == "" {
return nil
}
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
if _, err := C.aa_change_onexec(cName); err != nil {
return err
}
return nil
}

View file

@ -1,11 +0,0 @@
// +build !apparmor !linux !amd64
package apparmor
func IsEnabled() bool {
return false
}
func ApplyProfile(name string) error {
return nil
}

View file

@ -1,94 +0,0 @@
package apparmor
import (
"io"
"os"
"text/template"
)
type data struct {
Name string
Imports []string
InnerImports []string
}
const baseTemplate = `
{{range $value := .Imports}}
{{$value}}
{{end}}
profile {{.Name}} flags=(attach_disconnected,mediate_deleted) {
{{range $value := .InnerImports}}
{{$value}}
{{end}}
network,
capability,
file,
umount,
mount fstype=tmpfs,
mount fstype=mqueue,
mount fstype=fuse.*,
mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,
mount fstype=efivarfs -> /sys/firmware/efi/efivars/,
mount fstype=fusectl -> /sys/fs/fuse/connections/,
mount fstype=securityfs -> /sys/kernel/security/,
mount fstype=debugfs -> /sys/kernel/debug/,
mount fstype=proc -> /proc/,
mount fstype=sysfs -> /sys/,
deny @{PROC}/sys/fs/** wklx,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
deny @{PROC}/kmem rwklx,
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
deny @{PROC}/sys/kernel/*/** wklx,
deny mount options=(ro, remount) -> /,
deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
deny mount fstype=devpts,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,
deny /sys/fs/[^c]*/** wklx,
deny /sys/fs/c[^g]*/** wklx,
deny /sys/fs/cg[^r]*/** wklx,
deny /sys/firmware/efi/efivars/** rwklx,
deny /sys/kernel/security/** rwklx,
}
`
func generateProfile(out io.Writer) error {
compiled, err := template.New("apparmor_profile").Parse(baseTemplate)
if err != nil {
return err
}
data := &data{
Name: "docker-default",
}
if tuntablesExists() {
data.Imports = append(data.Imports, "#include <tunables/global>")
} else {
data.Imports = append(data.Imports, "@{PROC}=/proc/")
}
if abstrctionsEsists() {
data.InnerImports = append(data.InnerImports, "#include <abstractions/base>")
}
if err := compiled.Execute(out, data); err != nil {
return err
}
return nil
}
// check if the tunables/global exist
func tuntablesExists() bool {
_, err := os.Stat("/etc/apparmor.d/tunables/global")
return err == nil
}
// check if abstractions/base exist
func abstrctionsEsists() bool {
_, err := os.Stat("/etc/apparmor.d/abstractions/base")
return err == nil
}

View file

@ -1,44 +0,0 @@
package apparmor
import (
"fmt"
"os"
"os/exec"
"path"
)
const (
DefaultProfilePath = "/etc/apparmor.d/docker"
)
func InstallDefaultProfile() error {
if !IsEnabled() {
return nil
}
// Make sure /etc/apparmor.d exists
if err := os.MkdirAll(path.Dir(DefaultProfilePath), 0755); err != nil {
return err
}
f, err := os.OpenFile(DefaultProfilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return err
}
if err := generateProfile(f); err != nil {
f.Close()
return err
}
f.Close()
cmd := exec.Command("/sbin/apparmor_parser", "-r", "-W", "docker")
// to use the parser directly we have to make sure we are in the correct
// dir with the profile
cmd.Dir = "/etc/apparmor.d"
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("Error loading docker apparmor profile: %s (%s)", err, output)
}
return nil
}

View file

@ -1,40 +0,0 @@
package cgroups
import (
"errors"
"github.com/dotcloud/docker/pkg/libcontainer/devices"
)
var (
ErrNotFound = errors.New("mountpoint not found")
)
type FreezerState string
const (
Undefined FreezerState = ""
Frozen FreezerState = "FROZEN"
Thawed FreezerState = "THAWED"
)
type Cgroup struct {
Name string `json:"name,omitempty"`
Parent string `json:"parent,omitempty"` // name of parent cgroup or slice
AllowAllDevices bool `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
AllowedDevices []*devices.Device `json:"allowed_devices,omitempty"`
Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes)
MemoryReservation int64 `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes)
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process
Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
}
type ActiveCgroup interface {
Cleanup() error
}

View file

@ -1,27 +0,0 @@
package cgroups
import (
"bytes"
"testing"
)
const (
cgroupsContents = `11:hugetlb:/
10:perf_event:/
9:blkio:/
8:net_cls:/
7:freezer:/
6:devices:/
5:memory:/
4:cpuacct,cpu:/
3:cpuset:/
2:name=systemd:/user.slice/user-1000.slice/session-16.scope`
)
func TestParseCgroups(t *testing.T) {
r := bytes.NewBuffer([]byte(cgroupsContents))
_, err := parseCgroupFile("blkio", r)
if err != nil {
t.Fatal(err)
}
}

View file

@ -1,180 +0,0 @@
package fs
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
var (
subsystems = map[string]subsystem{
"devices": &devicesGroup{},
"memory": &memoryGroup{},
"cpu": &cpuGroup{},
"cpuset": &cpusetGroup{},
"cpuacct": &cpuacctGroup{},
"blkio": &blkioGroup{},
"perf_event": &perfEventGroup{},
"freezer": &freezerGroup{},
}
)
type subsystem interface {
Set(*data) error
Remove(*data) error
GetStats(*data, *cgroups.Stats) error
}
type data struct {
root string
cgroup string
c *cgroups.Cgroup
pid int
}
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
d, err := getCgroupData(c, pid)
if err != nil {
return nil, err
}
for _, sys := range subsystems {
if err := sys.Set(d); err != nil {
d.Cleanup()
return nil, err
}
}
return d, nil
}
func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
stats := cgroups.NewStats()
d, err := getCgroupData(c, 0)
if err != nil {
return nil, err
}
for _, sys := range subsystems {
if err := sys.GetStats(d, stats); err != nil {
return nil, err
}
}
return stats, nil
}
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error {
d, err := getCgroupData(c, 0)
if err != nil {
return err
}
c.Freezer = state
freezer := subsystems["freezer"]
return freezer.Set(d)
}
func GetPids(c *cgroups.Cgroup) ([]int, error) {
d, err := getCgroupData(c, 0)
if err != nil {
return nil, err
}
dir, err := d.path("devices")
if err != nil {
return nil, err
}
return cgroups.ReadProcsFile(dir)
}
func getCgroupData(c *cgroups.Cgroup, pid int) (*data, error) {
// we can pick any subsystem to find the root
cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu")
if err != nil {
return nil, err
}
cgroupRoot = filepath.Dir(cgroupRoot)
if _, err := os.Stat(cgroupRoot); err != nil {
return nil, fmt.Errorf("cgroups fs not found")
}
cgroup := c.Name
if c.Parent != "" {
cgroup = filepath.Join(c.Parent, cgroup)
}
return &data{
root: cgroupRoot,
cgroup: cgroup,
c: c,
pid: pid,
}, nil
}
func (raw *data) parent(subsystem string) (string, error) {
initPath, err := cgroups.GetInitCgroupDir(subsystem)
if err != nil {
return "", err
}
return filepath.Join(raw.root, subsystem, initPath), nil
}
func (raw *data) path(subsystem string) (string, error) {
parent, err := raw.parent(subsystem)
if err != nil {
return "", err
}
return filepath.Join(parent, raw.cgroup), nil
}
func (raw *data) join(subsystem string) (string, error) {
path, err := raw.path(subsystem)
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return "", err
}
if err := writeFile(path, "cgroup.procs", strconv.Itoa(raw.pid)); err != nil {
return "", err
}
return path, nil
}
func (raw *data) Cleanup() error {
for _, sys := range subsystems {
sys.Remove(raw)
}
return nil
}
func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func readFile(dir, file string) (string, error) {
data, err := ioutil.ReadFile(filepath.Join(dir, file))
return string(data), err
}
func removePath(p string, err error) error {
if err != nil {
return err
}
if p != "" {
return os.RemoveAll(p)
}
return nil
}

View file

@ -1,142 +0,0 @@
package fs
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
type blkioGroup struct {
}
func (s *blkioGroup) Set(d *data) error {
// we just want to join this group even though we don't set anything
if _, err := d.join("blkio"); err != nil && err != cgroups.ErrNotFound {
return err
}
return nil
}
func (s *blkioGroup) Remove(d *data) error {
return removePath(d.path("blkio"))
}
/*
examples:
blkio.sectors
8:0 6792
blkio.io_service_bytes
8:0 Read 1282048
8:0 Write 2195456
8:0 Sync 2195456
8:0 Async 1282048
8:0 Total 3477504
Total 3477504
blkio.io_serviced
8:0 Read 124
8:0 Write 104
8:0 Sync 104
8:0 Async 124
8:0 Total 228
Total 228
blkio.io_queued
8:0 Read 0
8:0 Write 0
8:0 Sync 0
8:0 Async 0
8:0 Total 0
Total 0
*/
func splitBlkioStatLine(r rune) bool {
return r == ' ' || r == ':'
}
func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
var blkioStats []cgroups.BlkioStatEntry
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
// format: dev type amount
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
if len(fields) < 3 {
if len(fields) == 2 && fields[0] == "Total" {
// skip total line
continue
} else {
return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
}
}
v, err := strconv.ParseUint(fields[0], 10, 64)
if err != nil {
return nil, err
}
major := v
v, err = strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return nil, err
}
minor := v
op := ""
valueField := 2
if len(fields) == 4 {
op = fields[2]
valueField = 3
}
v, err = strconv.ParseUint(fields[valueField], 10, 64)
if err != nil {
return nil, err
}
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
}
return blkioStats, nil
}
func (s *blkioGroup) GetStats(d *data, stats *cgroups.Stats) error {
var blkioStats []cgroups.BlkioStatEntry
var err error
path, err := d.path("blkio")
if err != nil {
return err
}
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
return err
}
stats.BlkioStats.SectorsRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
return err
}
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
return err
}
stats.BlkioStats.IoServicedRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
return err
}
stats.BlkioStats.IoQueuedRecursive = blkioStats
return nil
}

View file

@ -1,174 +0,0 @@
package fs
import (
"testing"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
const (
sectorsRecursiveContents = `8:0 1024`
serviceBytesRecursiveContents = `8:0 Read 100
8:0 Write 200
8:0 Sync 300
8:0 Async 500
8:0 Total 500
Total 500`
servicedRecursiveContents = `8:0 Read 10
8:0 Write 40
8:0 Sync 20
8:0 Async 30
8:0 Total 50
Total 50`
queuedRecursiveContents = `8:0 Read 1
8:0 Write 4
8:0 Sync 2
8:0 Async 3
8:0 Total 5
Total 5`
)
var actualStats = *cgroups.NewStats()
func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) {
*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
}
func TestBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &blkioGroup{}
err := blkio.GetStats(helper.CgroupData, &actualStats)
if err != nil {
t.Fatal(err)
}
// Verify expected stats.
expectedStats := cgroups.BlkioStats{}
appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total")
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
}
func TestBlkioStatsNoSectorsFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
})
blkio := &blkioGroup{}
err := blkio.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsNoServiceBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &blkioGroup{}
err := blkio.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsNoServicedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &blkioGroup{}
err := blkio.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsNoQueuedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &blkioGroup{}
err := blkio.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read 100 100",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &blkioGroup{}
err := blkio.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsUnexpectedFieldType(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read Write",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &blkioGroup{}
err := blkio.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}

View file

@ -1,78 +0,0 @@
package fs
import (
"bufio"
"os"
"path/filepath"
"strconv"
"syscall"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
type cpuGroup struct {
}
func (s *cpuGroup) Set(d *data) error {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
dir, err := d.join("cpu")
if err != nil {
return err
}
if d.c.CpuShares != 0 {
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(d.c.CpuShares, 10)); err != nil {
return err
}
}
if d.c.CpuPeriod != 0 {
if err := writeFile(dir, "cpu.cfs_period_us", strconv.FormatInt(d.c.CpuPeriod, 10)); err != nil {
return err
}
}
if d.c.CpuQuota != 0 {
if err := writeFile(dir, "cpu.cfs_quota_us", strconv.FormatInt(d.c.CpuQuota, 10)); err != nil {
return err
}
}
return nil
}
func (s *cpuGroup) Remove(d *data) error {
return removePath(d.path("cpu"))
}
func (s *cpuGroup) GetStats(d *data, stats *cgroups.Stats) error {
path, err := d.path("cpu")
if err != nil {
return err
}
f, err := os.Open(filepath.Join(path, "cpu.stat"))
if err != nil {
if pathErr, ok := err.(*os.PathError); ok && pathErr.Err == syscall.ENOENT {
return nil
}
return err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
t, v, err := getCgroupParamKeyValue(sc.Text())
if err != nil {
return err
}
switch t {
case "nr_periods":
stats.CpuStats.ThrottlingData.Periods = v
case "nr_throttled":
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
case "throttled_time":
stats.CpuStats.ThrottlingData.ThrottledTime = v
}
}
return nil
}

View file

@ -1,66 +0,0 @@
package fs
import (
"fmt"
"testing"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
func TestCpuStats(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
kNrPeriods = 2000
kNrThrottled = 200
kThrottledTime = uint64(18446744073709551615)
)
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
kNrPeriods, kNrThrottled, kThrottledTime)
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &cpuGroup{}
err := cpu.GetStats(helper.CgroupData, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.ThrottlingData{
Periods: kNrPeriods,
ThrottledPeriods: kNrThrottled,
ThrottledTime: kThrottledTime}
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
}
func TestNoCpuStatFile(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpu := &cpuGroup{}
err := cpu.GetStats(helper.CgroupData, &actualStats)
if err != nil {
t.Fatal("Expected not to fail, but did")
}
}
func TestInvalidCpuStat(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpuStatContent := `nr_periods 2000
nr_throttled 200
throttled_time fortytwo`
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &cpuGroup{}
err := cpu.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected failed stat parsing.")
}
}

View file

@ -1,162 +0,0 @@
package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
"github.com/dotcloud/docker/pkg/system"
)
var (
cpuCount = uint64(runtime.NumCPU())
clockTicks = uint64(system.GetClockTicks())
)
const nanosecondsInSecond = 1000000000
type cpuacctGroup struct {
}
func (s *cpuacctGroup) Set(d *data) error {
// we just want to join this group even though we don't set anything
if _, err := d.join("cpuacct"); err != nil && err != cgroups.ErrNotFound {
return err
}
return nil
}
func (s *cpuacctGroup) Remove(d *data) error {
return removePath(d.path("cpuacct"))
}
func (s *cpuacctGroup) GetStats(d *data, stats *cgroups.Stats) error {
var (
startCpu, lastCpu, startSystem, lastSystem, startUsage, lastUsage, kernelModeUsage, userModeUsage, percentage uint64
)
path, err := d.path("cpuacct")
if kernelModeUsage, userModeUsage, err = s.getCpuUsage(d, path); err != nil {
return err
}
startCpu = kernelModeUsage + userModeUsage
if startSystem, err = s.getSystemCpuUsage(d); err != nil {
return err
}
startUsageTime := time.Now()
if startUsage, err = getCgroupParamInt(path, "cpuacct.usage"); err != nil {
return err
}
// sample for 100ms
time.Sleep(100 * time.Millisecond)
if kernelModeUsage, userModeUsage, err = s.getCpuUsage(d, path); err != nil {
return err
}
lastCpu = kernelModeUsage + userModeUsage
if lastSystem, err = s.getSystemCpuUsage(d); err != nil {
return err
}
usageSampleDuration := time.Since(startUsageTime)
if lastUsage, err = getCgroupParamInt(path, "cpuacct.usage"); err != nil {
return err
}
var (
deltaProc = lastCpu - startCpu
deltaSystem = lastSystem - startSystem
deltaUsage = lastUsage - startUsage
)
if deltaSystem > 0.0 {
percentage = ((deltaProc / deltaSystem) * clockTicks) * cpuCount
}
// NOTE: a percentage over 100% is valid for POSIX because that means the
// processes is using multiple cores
stats.CpuStats.CpuUsage.PercentUsage = percentage
// Delta usage is in nanoseconds of CPU time so get the usage (in cores) over the sample time.
stats.CpuStats.CpuUsage.CurrentUsage = deltaUsage / uint64(usageSampleDuration.Nanoseconds())
percpuUsage, err := s.getPercpuUsage(path)
if err != nil {
return err
}
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
stats.CpuStats.CpuUsage.UsageInKernelmode = (kernelModeUsage * nanosecondsInSecond) / clockTicks
stats.CpuStats.CpuUsage.UsageInUsermode = (userModeUsage * nanosecondsInSecond) / clockTicks
return nil
}
// TODO(vmarmol): Use cgroups stats.
func (s *cpuacctGroup) getSystemCpuUsage(d *data) (uint64, error) {
f, err := os.Open("/proc/stat")
if err != nil {
return 0, err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
parts := strings.Fields(sc.Text())
switch parts[0] {
case "cpu":
if len(parts) < 8 {
return 0, fmt.Errorf("invalid number of cpu fields")
}
var total uint64
for _, i := range parts[1:8] {
v, err := strconv.ParseUint(i, 10, 64)
if err != nil {
return 0.0, fmt.Errorf("Unable to convert value %s to int: %s", i, err)
}
total += v
}
return total, nil
default:
continue
}
}
return 0, fmt.Errorf("invalid stat format")
}
func (s *cpuacctGroup) getCpuUsage(d *data, path string) (uint64, uint64, error) {
kernelModeUsage := uint64(0)
userModeUsage := uint64(0)
data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.stat"))
if err != nil {
return 0, 0, err
}
fields := strings.Fields(string(data))
if len(fields) != 4 {
return 0, 0, fmt.Errorf("Failure - %s is expected to have 4 fields", filepath.Join(path, "cpuacct.stat"))
}
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
return 0, 0, err
}
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
return 0, 0, err
}
return kernelModeUsage, userModeUsage, nil
}
func (s *cpuacctGroup) getPercpuUsage(path string) ([]uint64, error) {
percpuUsage := []uint64{}
data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
if err != nil {
return percpuUsage, err
}
for _, value := range strings.Fields(string(data)) {
value, err := strconv.ParseUint(value, 10, 64)
if err != nil {
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
}
percpuUsage = append(percpuUsage, value)
}
return percpuUsage, nil
}

View file

@ -1,110 +0,0 @@
package fs
import (
"bytes"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
type cpusetGroup struct {
}
func (s *cpusetGroup) Set(d *data) error {
// we don't want to join this cgroup unless it is specified
if d.c.CpusetCpus != "" {
dir, err := d.path("cpuset")
if err != nil {
return err
}
if err := s.ensureParent(dir); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(d.pid)); err != nil {
return err
}
if err := writeFile(dir, "cpuset.cpus", d.c.CpusetCpus); err != nil {
return err
}
}
return nil
}
func (s *cpusetGroup) Remove(d *data) error {
return removePath(d.path("cpuset"))
}
func (s *cpusetGroup) GetStats(d *data, stats *cgroups.Stats) error {
return nil
}
func (s *cpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
return
}
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
return
}
return cpus, mems, nil
}
// ensureParent ensures that the parent directory of current is created
// with the proper cpus and mems files copied from it's parent if the values
// are a file with a new line char
func (s *cpusetGroup) ensureParent(current string) error {
parent := filepath.Dir(current)
if _, err := os.Stat(parent); err != nil {
if !os.IsNotExist(err) {
return err
}
if err := s.ensureParent(parent); err != nil {
return err
}
}
if err := os.MkdirAll(current, 0755); err != nil && !os.IsExist(err) {
return err
}
return s.copyIfNeeded(current, parent)
}
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
func (s *cpusetGroup) copyIfNeeded(current, parent string) error {
var (
err error
currentCpus, currentMems []byte
parentCpus, parentMems []byte
)
if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
return err
}
if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
return err
}
if s.isEmpty(currentCpus) {
if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
return err
}
}
if s.isEmpty(currentMems) {
if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil {
return err
}
}
return nil
}
func (s *cpusetGroup) isEmpty(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}

View file

@ -1,34 +0,0 @@
package fs
import "github.com/dotcloud/docker/pkg/libcontainer/cgroups"
type devicesGroup struct {
}
func (s *devicesGroup) Set(d *data) error {
dir, err := d.join("devices")
if err != nil {
return err
}
if !d.c.AllowAllDevices {
if err := writeFile(dir, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range d.c.AllowedDevices {
if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil {
return err
}
}
}
return nil
}
func (s *devicesGroup) Remove(d *data) error {
return removePath(d.path("devices"))
}
func (s *devicesGroup) GetStats(d *data, stats *cgroups.Stats) error {
return nil
}

View file

@ -1,71 +0,0 @@
package fs
import (
"io/ioutil"
"path/filepath"
"strings"
"time"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
type freezerGroup struct {
}
func (s *freezerGroup) Set(d *data) error {
switch d.c.Freezer {
case cgroups.Frozen, cgroups.Thawed:
dir, err := d.path("freezer")
if err != nil {
return err
}
if err := writeFile(dir, "freezer.state", string(d.c.Freezer)); err != nil {
return err
}
for {
state, err := readFile(dir, "freezer.state")
if err != nil {
return err
}
if strings.TrimSpace(state) == string(d.c.Freezer) {
break
}
time.Sleep(1 * time.Millisecond)
}
default:
if _, err := d.join("freezer"); err != nil && err != cgroups.ErrNotFound {
return err
}
}
return nil
}
func (s *freezerGroup) Remove(d *data) error {
return removePath(d.path("freezer"))
}
func getFreezerFileData(path string) (string, error) {
data, err := ioutil.ReadFile(path)
return strings.TrimSuffix(string(data), "\n"), err
}
func (s *freezerGroup) GetStats(d *data, stats *cgroups.Stats) error {
path, err := d.path("freezer")
if err != nil {
return err
}
var data string
if data, err = getFreezerFileData(filepath.Join(path, "freezer.parent_freezing")); err != nil {
return err
}
stats.FreezerStats.ParentState = data
if data, err = getFreezerFileData(filepath.Join(path, "freezer.self_freezing")); err != nil {
return err
}
stats.FreezerStats.SelfState = data
return nil
}

View file

@ -1,94 +0,0 @@
package fs
import (
"bufio"
"os"
"path/filepath"
"strconv"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
type memoryGroup struct {
}
func (s *memoryGroup) Set(d *data) error {
dir, err := d.join("memory")
// only return an error for memory if it was not specified
if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) {
return err
}
defer func() {
if err != nil {
os.RemoveAll(dir)
}
}()
// Only set values if some config was specified.
if d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0 {
if d.c.Memory != 0 {
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil {
return err
}
}
if d.c.MemoryReservation != 0 {
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(d.c.MemoryReservation, 10)); err != nil {
return err
}
}
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to `-1'.
if d.c.MemorySwap != -1 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil {
return err
}
}
}
return nil
}
func (s *memoryGroup) Remove(d *data) error {
return removePath(d.path("memory"))
}
func (s *memoryGroup) GetStats(d *data, stats *cgroups.Stats) error {
path, err := d.path("memory")
if err != nil {
return err
}
// Set stats from memory.stat.
statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
if err != nil {
return err
}
defer statsFile.Close()
sc := bufio.NewScanner(statsFile)
for sc.Scan() {
t, v, err := getCgroupParamKeyValue(sc.Text())
if err != nil {
return err
}
stats.MemoryStats.Stats[t] = v
}
// Set memory usage and max historical usage.
value, err := getCgroupParamInt(path, "memory.usage_in_bytes")
if err != nil {
return err
}
stats.MemoryStats.Usage = value
value, err = getCgroupParamInt(path, "memory.max_usage_in_bytes")
if err != nil {
return err
}
stats.MemoryStats.MaxUsage = value
value, err = getCgroupParamInt(path, "memory.failcnt")
if err != nil {
return err
}
stats.MemoryStats.Failcnt = value
return nil
}

View file

@ -1,127 +0,0 @@
package fs
import (
"testing"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
const (
memoryStatContents = `cache 512
rss 1024`
memoryUsageContents = "2048\n"
memoryMaxUsageContents = "4096\n"
memoryFailcnt = "100\n"
)
func TestMemoryStats(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.failcnt": memoryFailcnt,
})
memory := &memoryGroup{}
err := memory.GetStats(helper.CgroupData, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.MemoryStats{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Stats: map[string]uint64{"cache": 512, "rss": 1024}}
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
}
func TestMemoryStatsNoStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &memoryGroup{}
err := memory.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsNoUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &memoryGroup{}
err := memory.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
})
memory := &memoryGroup{}
err := memory.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": "rss rss",
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &memoryGroup{}
err := memory.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": "bad",
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &memoryGroup{}
err := memory.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": "bad",
})
memory := &memoryGroup{}
err := memory.GetStats(helper.CgroupData, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}

View file

@ -1,24 +0,0 @@
package fs
import (
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
type perfEventGroup struct {
}
func (s *perfEventGroup) Set(d *data) error {
// we just want to join this group even though we don't set anything
if _, err := d.join("perf_event"); err != nil && err != cgroups.ErrNotFound {
return err
}
return nil
}
func (s *perfEventGroup) Remove(d *data) error {
return removePath(d.path("perf_event"))
}
func (s *perfEventGroup) GetStats(d *data, stats *cgroups.Stats) error {
return nil
}

View file

@ -1,73 +0,0 @@
package fs
import (
"fmt"
"log"
"testing"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error {
if len(expected) != len(actual) {
return fmt.Errorf("blkioStatEntries length do not match")
}
for i, expValue := range expected {
actValue := actual[i]
if expValue != actValue {
return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue)
}
}
return nil
}
func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil {
log.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil {
log.Printf("blkio IoServicedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil {
log.Printf("blkio IoQueuedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil {
log.Printf("blkio SectorsRecursive do not match - %s\n", err)
t.Fail()
}
}
func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) {
if expected != actual {
log.Printf("Expected throttling data %v but found %v\n", expected, actual)
t.Fail()
}
}
func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) {
if expected.Usage != actual.Usage {
log.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage)
t.Fail()
}
if expected.MaxUsage != actual.MaxUsage {
log.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage)
t.Fail()
}
for key, expValue := range expected.Stats {
actValue, ok := actual.Stats[key]
if !ok {
log.Printf("Expected memory stat key %s not found\n", key)
t.Fail()
}
if expValue != actValue {
log.Printf("Expected memory stat value %d but found %d\n", expValue, actValue)
t.Fail()
}
}
}

View file

@ -1,60 +0,0 @@
/*
Utility for testing cgroup operations.
Creates a mock of the cgroup filesystem for the duration of the test.
*/
package fs
import (
"fmt"
"io/ioutil"
"os"
"testing"
)
type cgroupTestUtil struct {
// data to use in tests.
CgroupData *data
// Path to the mock cgroup directory.
CgroupPath string
// Temporary directory to store mock cgroup filesystem.
tempDir string
t *testing.T
}
// Creates a new test util for the specified subsystem
func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
d := &data{}
tempDir, err := ioutil.TempDir("", fmt.Sprintf("%s_cgroup_test", subsystem))
if err != nil {
t.Fatal(err)
}
d.root = tempDir
testCgroupPath, err := d.path(subsystem)
if err != nil {
t.Fatal(err)
}
// Ensure the full mock cgroup path exists.
err = os.MkdirAll(testCgroupPath, 0755)
if err != nil {
t.Fatal(err)
}
return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t}
}
func (c *cgroupTestUtil) cleanup() {
os.RemoveAll(c.tempDir)
}
// Write the specified contents on the mock of the specified cgroup files.
func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) {
for file, contents := range fileContents {
err := writeFile(c.CgroupPath, file, contents)
if err != nil {
c.t.Fatal(err)
}
}
}

View file

@ -1,40 +0,0 @@
package fs
import (
"errors"
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)
// Parses a cgroup param and returns as name, value
// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
func getCgroupParamKeyValue(t string) (string, uint64, error) {
parts := strings.Fields(t)
switch len(parts) {
case 2:
value, err := strconv.ParseUint(parts[1], 10, 64)
if err != nil {
return "", 0, fmt.Errorf("Unable to convert param value to uint64: %s", err)
}
return parts[0], value, nil
default:
return "", 0, ErrNotValidFormat
}
}
// Gets a single int64 value from the specified cgroup file.
func getCgroupParamInt(cgroupPath, cgroupFile string) (uint64, error) {
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(contents)), 10, 64)
}

View file

@ -1,68 +0,0 @@
package fs
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
)
const (
cgroupFile = "cgroup.file"
floatValue = 2048.0
floatString = "2048"
)
func TestGetCgroupParamsInt(t *testing.T) {
// Setup tempdir.
tempDir, err := ioutil.TempDir("", "cgroup_utils_test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tempDir)
tempFile := filepath.Join(tempDir, cgroupFile)
// Success.
err = ioutil.WriteFile(tempFile, []byte(floatString), 0755)
if err != nil {
t.Fatal(err)
}
value, err := getCgroupParamInt(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %f to equal %f", value, floatValue)
}
// Success with new line.
err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamInt(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %f to equal %f", value, floatValue)
}
// Not a float.
err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamInt(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
// Unknown file.
err = os.Remove(tempFile)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamInt(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
}

View file

@ -1,72 +0,0 @@
package cgroups
type ThrottlingData struct {
// Number of periods with throttling active
Periods uint64 `json:"periods,omitempty"`
// Number of periods when the container hit its throttling limit.
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
// Aggregate time the container was throttled for in nanoseconds.
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
type CpuUsage struct {
// percentage of available CPUs currently being used.
PercentUsage uint64 `json:"percent_usage,omitempty"`
// nanoseconds of cpu time consumed over the last 100 ms.
CurrentUsage uint64 `json:"current_usage,omitempty"`
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
// Time spent by tasks of the cgroup in kernel mode. Units: nanoseconds.
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
// Time spent by tasks of the cgroup in user mode. Units: nanoseconds.
UsageInUsermode uint64 `json:"usage_in_usermode"`
}
type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throlling_data,omitempty"`
}
type MemoryStats struct {
// current res_counter usage for memory
Usage uint64 `json:"usage,omitempty"`
// maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"`
// TODO(vishh): Export these as stronger types.
// all the stats exported via memory.stat.
Stats map[string]uint64 `json:"stats,omitempty"`
// number of times memory usage hits limits.
Failcnt uint64 `json:"failcnt"`
}
type BlkioStatEntry struct {
Major uint64 `json:"major,omitempty"`
Minor uint64 `json:"minor,omitempty"`
Op string `json:"op,omitempty"`
Value uint64 `json:"value,omitempty"`
}
type BlkioStats struct {
// number of bytes tranferred to and from the block device
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recusrive,omitempty"`
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
}
// TODO(Vishh): Remove freezer from stats since it does not logically belong in stats.
type FreezerStats struct {
ParentState string `json:"parent_state,omitempty"`
SelfState string `json:"self_state,omitempty"`
}
type Stats struct {
CpuStats CpuStats `json:"cpu_stats,omitempty"`
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
FreezerStats FreezerStats `json:"freezer_stats,omitempty"`
}
func NewStats() *Stats {
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
return &Stats{MemoryStats: memoryStats}
}

View file

@ -1,25 +0,0 @@
// +build !linux
package systemd
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
func UseSystemd() bool {
return false
}
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func GetPids(c *cgroups.Cgroup) ([]int, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error {
return fmt.Errorf("Systemd not supported")
}

View file

@ -1,391 +0,0 @@
// +build linux
package systemd
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
systemd1 "github.com/coreos/go-systemd/dbus"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
"github.com/dotcloud/docker/pkg/systemd"
"github.com/godbus/dbus"
)
type systemdCgroup struct {
cleanupDirs []string
}
var (
connLock sync.Mutex
theConn *systemd1.Conn
hasStartTransientUnit bool
)
func UseSystemd() bool {
if !systemd.SdBooted() {
return false
}
connLock.Lock()
defer connLock.Unlock()
if theConn == nil {
var err error
theConn, err = systemd1.New()
if err != nil {
return false
}
// Assume we have StartTransientUnit
hasStartTransientUnit = true
// But if we get UnknownMethod error we don't
if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
hasStartTransientUnit = false
}
}
}
}
return hasStartTransientUnit
}
func getIfaceForUnit(unitName string) string {
if strings.HasSuffix(unitName, ".scope") {
return "Scope"
}
if strings.HasSuffix(unitName, ".service") {
return "Service"
}
return "Unit"
}
type cgroupArg struct {
File string
Value string
}
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
var (
unitName = getUnitName(c)
slice = "system.slice"
properties []systemd1.Property
cpuArgs []cgroupArg
cpusetArgs []cgroupArg
memoryArgs []cgroupArg
res systemdCgroup
)
// First set up things not supported by systemd
// -1 disables memorySwap
if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) {
memorySwap := c.MemorySwap
if memorySwap == 0 {
// By default, MemorySwap is set to twice the size of RAM.
memorySwap = c.Memory * 2
}
memoryArgs = append(memoryArgs, cgroupArg{"memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)})
}
if c.CpusetCpus != "" {
cpusetArgs = append(cpusetArgs, cgroupArg{"cpuset.cpus", c.CpusetCpus})
}
if c.Slice != "" {
slice = c.Slice
}
properties = append(properties,
systemd1.Property{"Slice", dbus.MakeVariant(slice)},
systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)},
systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})},
)
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties,
systemd1.Property{"MemoryAccounting", dbus.MakeVariant(true)},
systemd1.Property{"CPUAccounting", dbus.MakeVariant(true)},
systemd1.Property{"BlockIOAccounting", dbus.MakeVariant(true)})
if c.Memory != 0 {
properties = append(properties,
systemd1.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))})
}
// TODO: MemoryReservation and MemorySwap not available in systemd
if c.CpuShares != 0 {
properties = append(properties,
systemd1.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))})
}
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
return nil, err
}
// To work around the lack of /dev/pts/* support above we need to manually add these
// so, ask systemd for the cgroup used
props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName))
if err != nil {
return nil, err
}
cgroup := props["ControlGroup"].(string)
if !c.AllowAllDevices {
// Atm we can't use the systemd device support because of two missing things:
// * Support for wildcards to allow mknod on any device
// * Support for wildcards to allow /dev/pts support
//
// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is
// in wide use. When both these are availalable we will be able to switch, but need to keep the old
// implementation for backwards compat.
//
// Note: we can't use systemd to set up the initial limits, and then change the cgroup
// because systemd will re-write the device settings if it needs to re-apply the cgroup context.
// This happens at least for v208 when any sibling unit is started.
mountpoint, err := cgroups.FindCgroupMountpoint("devices")
if err != nil {
return nil, err
}
initPath, err := cgroups.GetInitCgroupDir("devices")
if err != nil {
return nil, err
}
dir := filepath.Join(mountpoint, initPath, c.Parent, c.Name)
res.cleanupDirs = append(res.cleanupDirs, dir)
if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) {
return nil, err
}
if err := ioutil.WriteFile(filepath.Join(dir, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil {
return nil, err
}
if err := writeFile(dir, "devices.deny", "a"); err != nil {
return nil, err
}
for _, dev := range c.AllowedDevices {
if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil {
return nil, err
}
}
}
if len(cpuArgs) != 0 {
mountpoint, err := cgroups.FindCgroupMountpoint("cpu")
if err != nil {
return nil, err
}
path := filepath.Join(mountpoint, cgroup)
for _, arg := range cpuArgs {
if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil {
return nil, err
}
}
}
if len(memoryArgs) != 0 {
mountpoint, err := cgroups.FindCgroupMountpoint("memory")
if err != nil {
return nil, err
}
path := filepath.Join(mountpoint, cgroup)
for _, arg := range memoryArgs {
if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil {
return nil, err
}
}
}
// we need to manually join the freezer cgroup in systemd because it does not currently support it
// via the dbus api
freezerPath, err := joinFreezer(c, pid)
if err != nil {
return nil, err
}
res.cleanupDirs = append(res.cleanupDirs, freezerPath)
if len(cpusetArgs) != 0 {
// systemd does not atm set up the cpuset controller, so we must manually
// join it. Additionally that is a very finicky controller where each
// level must have a full setup as the default for a new directory is "no cpus",
// so we avoid using any hierarchies here, creating a toplevel directory.
mountpoint, err := cgroups.FindCgroupMountpoint("cpuset")
if err != nil {
return nil, err
}
initPath, err := cgroups.GetInitCgroupDir("cpuset")
if err != nil {
return nil, err
}
var (
foundCpus bool
foundMems bool
rootPath = filepath.Join(mountpoint, initPath)
path = filepath.Join(mountpoint, initPath, c.Parent+"-"+c.Name)
)
res.cleanupDirs = append(res.cleanupDirs, path)
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return nil, err
}
for _, arg := range cpusetArgs {
if arg.File == "cpuset.cpus" {
foundCpus = true
}
if arg.File == "cpuset.mems" {
foundMems = true
}
if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil {
return nil, err
}
}
// These are required, if not specified inherit from parent
if !foundCpus {
s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.cpus"))
if err != nil {
return nil, err
}
if err := ioutil.WriteFile(filepath.Join(path, "cpuset.cpus"), s, 0700); err != nil {
return nil, err
}
}
// These are required, if not specified inherit from parent
if !foundMems {
s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.mems"))
if err != nil {
return nil, err
}
if err := ioutil.WriteFile(filepath.Join(path, "cpuset.mems"), s, 0700); err != nil {
return nil, err
}
}
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil {
return nil, err
}
}
return &res, nil
}
func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func (c *systemdCgroup) Cleanup() error {
// systemd cleans up, we don't need to do much
for _, path := range c.cleanupDirs {
os.RemoveAll(path)
}
return nil
}
func joinFreezer(c *cgroups.Cgroup, pid int) (string, error) {
path, err := getFreezerPath(c)
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return "", err
}
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil {
return "", err
}
return path, nil
}
func getFreezerPath(c *cgroups.Cgroup) (string, error) {
mountpoint, err := cgroups.FindCgroupMountpoint("freezer")
if err != nil {
return "", err
}
initPath, err := cgroups.GetInitCgroupDir("freezer")
if err != nil {
return "", err
}
return filepath.Join(mountpoint, initPath, fmt.Sprintf("%s-%s", c.Parent, c.Name)), nil
}
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error {
path, err := getFreezerPath(c)
if err != nil {
return err
}
if err := ioutil.WriteFile(filepath.Join(path, "freezer.state"), []byte(state), 0); err != nil {
return err
}
for {
state_, err := ioutil.ReadFile(filepath.Join(path, "freezer.state"))
if err != nil {
return err
}
if string(state) == string(bytes.TrimSpace(state_)) {
break
}
time.Sleep(1 * time.Millisecond)
}
return nil
}
func GetPids(c *cgroups.Cgroup) ([]int, error) {
unitName := getUnitName(c)
mountpoint, err := cgroups.FindCgroupMountpoint("cpu")
if err != nil {
return nil, err
}
props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName))
if err != nil {
return nil, err
}
cgroup := props["ControlGroup"].(string)
return cgroups.ReadProcsFile(filepath.Join(mountpoint, cgroup))
}
func getUnitName(c *cgroups.Cgroup) string {
return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name)
}

View file

@ -1,93 +0,0 @@
package cgroups
import (
"bufio"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/dotcloud/docker/pkg/mount"
)
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
mounts, err := mount.GetMounts()
if err != nil {
return "", err
}
for _, mount := range mounts {
if mount.Fstype == "cgroup" {
for _, opt := range strings.Split(mount.VfsOpts, ",") {
if opt == subsystem {
return mount.Mountpoint, nil
}
}
}
}
return "", ErrNotFound
}
// Returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", err
}
defer f.Close()
return parseCgroupFile(subsystem, f)
}
func GetInitCgroupDir(subsystem string) (string, error) {
f, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", err
}
defer f.Close()
return parseCgroupFile(subsystem, f)
}
func ReadProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
if err != nil {
return nil, err
}
defer f.Close()
var (
s = bufio.NewScanner(f)
out = []int{}
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, pid)
}
}
return out, nil
}
func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
s := bufio.NewScanner(r)
for s.Scan() {
if err := s.Err(); err != nil {
return "", err
}
text := s.Text()
parts := strings.Split(text, ":")
for _, subs := range strings.Split(parts[1], ",") {
if subs == subsystem {
return parts[2], nil
}
}
}
return "", ErrNotFound
}

View file

@ -1,58 +0,0 @@
// +build linux
package console
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/dotcloud/docker/pkg/libcontainer/label"
"github.com/dotcloud/docker/pkg/system"
)
// Setup initializes the proper /dev/console inside the rootfs path
func Setup(rootfs, consolePath, mountLabel string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
if err := os.Chmod(consolePath, 0600); err != nil {
return err
}
if err := os.Chown(consolePath, 0, 0); err != nil {
return err
}
if err := label.SetFileLabel(consolePath, mountLabel); err != nil {
return fmt.Errorf("set file label %s %s", consolePath, err)
}
dest := filepath.Join(rootfs, "dev/console")
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("create %s %s", dest, err)
}
if f != nil {
f.Close()
}
if err := system.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", consolePath, dest, err)
}
return nil
}
func OpenAndDup(consolePath string) error {
slave, err := system.OpenTerminal(consolePath, syscall.O_RDWR)
if err != nil {
return fmt.Errorf("open terminal %s", err)
}
if err := system.Dup2(slave.Fd(), 0); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 1); err != nil {
return err
}
return system.Dup2(slave.Fd(), 2)
}

View file

@ -1,113 +0,0 @@
package libcontainer
import (
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
"github.com/dotcloud/docker/pkg/libcontainer/devices"
)
// Context is a generic key value pair that allows arbatrary data to be sent
type Context map[string]string
// Container defines configuration options for executing a process inside a contained environment
type Container struct {
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname,omitempty"`
// ReadonlyFs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable
ReadonlyFs bool `json:"readonly_fs,omitempty"`
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root,omitempty"`
// User will set the uid and gid of the executing process running inside the container
User string `json:"user,omitempty"`
// WorkingDir will change the processes current working directory inside the container's rootfs
WorkingDir string `json:"working_dir,omitempty"`
// Env will populate the processes environment with the provided values
// Any values from the parent processes will be cleared before the values
// provided in Env are provided to the process
Env []string `json:"environment,omitempty"`
// Tty when true will allocate a pty slave on the host for access by the container's process
// and ensure that it is mounted inside the container's rootfs
Tty bool `json:"tty,omitempty"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces map[string]bool `json:"namespaces,omitempty"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities,omitempty"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks,omitempty"`
// Routes can be specified to create entries in the route table as the container is started
Routes []*Route `json:"routes,omitempty"`
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"`
// Context is a generic key value format that allows for additional settings to be passed
// on the container's creation
// This is commonly used to specify apparmor profiles, selinux labels, and different restrictions
// placed on the container's processes
Context Context `json:"context,omitempty"`
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
Mounts Mounts `json:"mounts,omitempty"`
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
DeviceNodes []*devices.Device `json:"device_nodes,omitempty"`
}
// Network defines configuration for a container's networking stack
//
// The network configuration can be omited from a container causing the
// container to be setup with the host's networking stack
type Network struct {
// Type sets the networks type, commonly veth and loopback
Type string `json:"type,omitempty"`
// Context is a generic key value format for setting additional options that are specific to
// the network type
Context Context `json:"context,omitempty"`
// Address contains the IP and mask to set on the network interface
Address string `json:"address,omitempty"`
// Gateway sets the gateway address that is used as the default for the interface
Gateway string `json:"gateway,omitempty"`
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
Mtu int `json:"mtu,omitempty"`
}
// Routes can be specified to create entries in the route table as the container is started
//
// All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and ommitted entries will use their
// IP family default for the route table. For IPv4 for example, setting the
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct {
// Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination,omitempty"`
// Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source,omitempty"`
// Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway,omitempty"`
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name,omitempty"`
}

View file

@ -1,107 +0,0 @@
{
"namespaces": {
"NEWNET": true,
"NEWPID": true,
"NEWIPC": true,
"NEWUTS": true,
"NEWNS": true
},
"networks": [
{
"gateway": "localhost",
"type": "loopback",
"address": "127.0.0.1/0",
"mtu": 1500
},
{
"gateway": "172.17.42.1",
"context": {
"prefix": "veth",
"bridge": "docker0"
},
"type": "veth",
"address": "172.17.42.2/16",
"mtu": 1500
}
],
"routes": [
{
"gateway": "172.17.42.1",
"interface_name": "eth0"
},
{
"destination": "192.168.0.0/24",
"interface_name": "eth0"
}
],
"capabilities": [
"MKNOD"
],
"cgroups": {
"name": "docker-koye",
"parent": "docker"
},
"hostname": "koye",
"environment": [
"HOME=/",
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
"container=docker",
"TERM=xterm-256color"
],
"tty": true,
"mounts": [
{
"type": "devtmpfs"
}
],
"device_nodes": [
{
"path": "/dev/null",
"type": 99,
"major_number": 1,
"minor_number": 3,
"cgroup_permissions": "rwm",
"file_mode": 438
},
{
"path": "/dev/zero",
"type": 99,
"major_number": 1,
"minor_number": 5,
"cgroup_permissions": "rwm",
"file_mode": 438
},
{
"path": "/dev/full",
"type": 99,
"major_number": 1,
"minor_number": 7,
"cgroup_permissions": "rwm",
"file_mode": 438
},
{
"path": "/dev/tty",
"type": 99,
"major_number": 5,
"minor_number": 0,
"cgroup_permissions": "rwm",
"file_mode": 438
},
{
"path": "/dev/urandom",
"type": 99,
"major_number": 1,
"minor_number": 9,
"cgroup_permissions": "rwm",
"file_mode": 438
},
{
"path": "/dev/random",
"type": 99,
"major_number": 1,
"minor_number": 8,
"cgroup_permissions": "rwm",
"file_mode": 438
}
]
}

View file

@ -1,69 +0,0 @@
package libcontainer
import (
"encoding/json"
"os"
"testing"
)
// Checks whether the expected capability is specified in the capabilities.
func contains(expected string, values []string) bool {
for _, v := range values {
if v == expected {
return true
}
}
return false
}
func TestContainerJsonFormat(t *testing.T) {
f, err := os.Open("container.json")
if err != nil {
t.Fatal("Unable to open container.json")
}
defer f.Close()
var container *Container
if err := json.NewDecoder(f).Decode(&container); err != nil {
t.Fatalf("failed to decode container config: %s", err)
}
if container.Hostname != "koye" {
t.Log("hostname is not set")
t.Fail()
}
if !container.Tty {
t.Log("tty should be set to true")
t.Fail()
}
if len(container.Routes) != 2 {
t.Log("should have found 2 routes")
t.Fail()
}
if !container.Namespaces["NEWNET"] {
t.Log("namespaces should contain NEWNET")
t.Fail()
}
if container.Namespaces["NEWUSER"] {
t.Log("namespaces should not contain NEWUSER")
t.Fail()
}
if contains("SYS_ADMIN", container.Capabilities) {
t.Log("SYS_ADMIN should not be enabled in capabilities mask")
t.Fail()
}
if !contains("MKNOD", container.Capabilities) {
t.Log("MKNOD should be enabled in capabilities mask")
t.Fail()
}
if contains("SYS_CHROOT", container.Capabilities) {
t.Log("capabilities mask should not contain SYS_CHROOT")
t.Fail()
}
}

View file

@ -1,159 +0,0 @@
package devices
var (
// These are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
MajorNumber: 1,
MinorNumber: 3,
CgroupPermissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
MajorNumber: 1,
MinorNumber: 5,
CgroupPermissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
MajorNumber: 1,
MinorNumber: 7,
CgroupPermissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
MajorNumber: 5,
MinorNumber: 0,
CgroupPermissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
MajorNumber: 1,
MinorNumber: 9,
CgroupPermissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
MajorNumber: 1,
MinorNumber: 8,
CgroupPermissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
MajorNumber: Wildcard,
MinorNumber: Wildcard,
CgroupPermissions: "m",
},
{
Type: 'b',
MajorNumber: Wildcard,
MinorNumber: Wildcard,
CgroupPermissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
MajorNumber: 5,
MinorNumber: 1,
CgroupPermissions: "rwm",
},
{
Path: "/dev/tty0",
Type: 'c',
MajorNumber: 4,
MinorNumber: 0,
CgroupPermissions: "rwm",
},
{
Path: "/dev/tty1",
Type: 'c',
MajorNumber: 4,
MinorNumber: 1,
CgroupPermissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
MajorNumber: 136,
MinorNumber: Wildcard,
CgroupPermissions: "rwm",
},
{
Path: "",
Type: 'c',
MajorNumber: 5,
MinorNumber: 2,
CgroupPermissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
MajorNumber: 10,
MinorNumber: 200,
CgroupPermissions: "rwm",
},
/*// fuse
{
Path: "",
Type: 'c',
MajorNumber: 10,
MinorNumber: 229,
CgroupPermissions: "rwm",
},
// rtc
{
Path: "",
Type: 'c',
MajorNumber: 254,
MinorNumber: 0,
CgroupPermissions: "rwm",
},
*/
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/dotcloud/docker/issues/514
// https://github.com/dotcloud/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
MajorNumber: 10,
MinorNumber: 229,
CgroupPermissions: "rwm",
},
}, DefaultSimpleDevices...)
)

View file

@ -1,119 +0,0 @@
package devices
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
const (
Wildcard = -1
)
var (
ErrNotADeviceNode = errors.New("not a device node")
)
type Device struct {
Type rune `json:"type,omitempty"`
Path string `json:"path,omitempty"` // It is fine if this is an empty string in the case that you are using Wildcards
MajorNumber int64 `json:"major_number,omitempty"` // Use the wildcard constant for wildcards.
MinorNumber int64 `json:"minor_number,omitempty"` // Use the wildcard constant for wildcards.
CgroupPermissions string `json:"cgroup_permissions,omitempty"` // Typically just "rwm"
FileMode os.FileMode `json:"file_mode,omitempty"` // The permission bits of the file's mode
}
func GetDeviceNumberString(deviceNumber int64) string {
if deviceNumber == Wildcard {
return "*"
} else {
return fmt.Sprintf("%d", deviceNumber)
}
}
func (device *Device) GetCgroupAllowString() string {
return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions)
}
// Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct.
func GetDevice(path string, cgroupPermissions string) (*Device, error) {
fileInfo, err := os.Stat(path)
if err != nil {
return nil, err
}
var (
devType rune
mode = fileInfo.Mode()
fileModePermissionBits = os.FileMode.Perm(mode)
)
switch {
case mode&os.ModeDevice == 0:
return nil, ErrNotADeviceNode
case mode&os.ModeCharDevice != 0:
fileModePermissionBits |= syscall.S_IFCHR
devType = 'c'
default:
fileModePermissionBits |= syscall.S_IFBLK
devType = 'b'
}
stat_t, ok := fileInfo.Sys().(*syscall.Stat_t)
if !ok {
return nil, fmt.Errorf("cannot determine the device number for device %s", path)
}
devNumber := int(stat_t.Rdev)
return &Device{
Type: devType,
Path: path,
MajorNumber: Major(devNumber),
MinorNumber: Minor(devNumber),
CgroupPermissions: cgroupPermissions,
FileMode: fileModePermissionBits,
}, nil
}
func GetHostDeviceNodes() ([]*Device, error) {
return getDeviceNodes("/dev")
}
func getDeviceNodes(path string) ([]*Device, error) {
files, err := ioutil.ReadDir(path)
if err != nil {
return nil, err
}
out := []*Device{}
for _, f := range files {
if f.IsDir() {
switch f.Name() {
case "pts", "shm", "fd":
continue
default:
sub, err := getDeviceNodes(filepath.Join(path, f.Name()))
if err != nil {
return nil, err
}
out = append(out, sub...)
continue
}
}
device, err := GetDevice(filepath.Join(path, f.Name()), "rwm")
if err != nil {
if err == ErrNotADeviceNode {
continue
}
return nil, err
}
out = append(out, device)
}
return out, nil
}

View file

@ -1,26 +0,0 @@
package devices
/*
This code provides support for manipulating linux device numbers. It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved.
You can read what they are here:
- http://www.makelinux.net/ldd3/chp-3-sect-2
- http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94
Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in <linux/kdev_t.h> as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9
*/
func Major(devNumber int) int64 {
return int64((devNumber >> 8) & 0xfff)
}
func Minor(devNumber int) int64 {
return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00))
}
func Mkdev(majorNumber int64, minorNumber int64) int {
return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12))
}

View file

@ -1,30 +0,0 @@
// +build !selinux !linux
package label
func GenLabels(options string) (string, string, error) {
return "", "", nil
}
func FormatMountLabel(src string, mountLabel string) string {
return src
}
func SetProcessLabel(processLabel string) error {
return nil
}
func SetFileLabel(path string, fileLabel string) error {
return nil
}
func GetPidCon(pid int) (string, error) {
return "", nil
}
func Init() {
}
func ReserveLabel(label string) error {
return nil
}

View file

@ -1,83 +0,0 @@
// +build selinux,linux
package label
import (
"fmt"
"strings"
"github.com/dotcloud/docker/pkg/libcontainer/selinux"
)
func GenLabels(options string) (string, string, error) {
if !selinux.SelinuxEnabled() {
return "", "", nil
}
var err error
processLabel, mountLabel := selinux.GetLxcContexts()
if processLabel != "" {
var (
s = strings.Fields(options)
l = len(s)
)
if l > 0 {
pcon := selinux.NewContext(processLabel)
for i := 0; i < l; i++ {
o := strings.Split(s[i], "=")
pcon[o[0]] = o[1]
}
processLabel = pcon.Get()
mountLabel, err = selinux.CopyLevel(processLabel, mountLabel)
}
}
return processLabel, mountLabel, err
}
func FormatMountLabel(src, mountLabel string) string {
if mountLabel != "" {
switch src {
case "":
src = fmt.Sprintf("context=%q", mountLabel)
default:
src = fmt.Sprintf("%s,context=%q", src, mountLabel)
}
}
return src
}
func SetProcessLabel(processLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setexeccon(processLabel)
}
return nil
}
func GetProcessLabel() (string, error) {
if selinux.SelinuxEnabled() {
return selinux.Getexeccon()
}
return "", nil
}
func SetFileLabel(path string, fileLabel string) error {
if selinux.SelinuxEnabled() && fileLabel != "" {
return selinux.Setfilecon(path, fileLabel)
}
return nil
}
func GetPidCon(pid int) (string, error) {
if !selinux.SelinuxEnabled() {
return "", nil
}
return selinux.Getpidcon(pid)
}
func Init() {
selinux.SelinuxEnabled()
}
func ReserveLabel(label string) error {
selinux.ReserveLabel(label)
return nil
}

View file

@ -1,201 +0,0 @@
// +build linux
package mount
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/label"
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
"github.com/dotcloud/docker/pkg/symlink"
"github.com/dotcloud/docker/pkg/system"
)
// default mount point flags
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
type mount struct {
source string
path string
device string
flags int
data string
}
// InitializeMountNamespace setups up the devices, mount points, and filesystems for use inside a
// new mount namepsace
func InitializeMountNamespace(rootfs, console string, container *libcontainer.Container) error {
var (
err error
flag = syscall.MS_PRIVATE
)
if container.NoPivotRoot {
flag = syscall.MS_SLAVE
}
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
return fmt.Errorf("mounting / with flags %X %s", (flag | syscall.MS_REC), err)
}
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
}
if err := mountSystem(rootfs, container); err != nil {
return fmt.Errorf("mount system %s", err)
}
if err := setupBindmounts(rootfs, container.Mounts); err != nil {
return fmt.Errorf("bind mounts %s", err)
}
if err := nodes.CreateDeviceNodes(rootfs, container.DeviceNodes); err != nil {
return fmt.Errorf("create device nodes %s", err)
}
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
return err
}
if err := setupDevSymlinks(rootfs); err != nil {
return fmt.Errorf("dev symlinks %s", err)
}
if err := system.Chdir(rootfs); err != nil {
return fmt.Errorf("chdir into %s %s", rootfs, err)
}
if container.NoPivotRoot {
err = MsMoveRoot(rootfs)
} else {
err = PivotRoot(rootfs)
}
if err != nil {
return err
}
if container.ReadonlyFs {
if err := SetReadonly(); err != nil {
return fmt.Errorf("set readonly %s", err)
}
}
system.Umask(0022)
return nil
}
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string, container *libcontainer.Container) error {
for _, m := range newSystemMounts(rootfs, container.Context["mount_label"], container.Mounts) {
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdirall %s %s", m.path, err)
}
if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
}
}
return nil
}
func createIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
} else {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0755)
if err != nil {
return err
}
f.Close()
}
}
}
return nil
}
func setupDevSymlinks(rootfs string) error {
var links = [][2]string{
{"/proc/self/fd", "/dev/fd"},
{"/proc/self/fd/0", "/dev/stdin"},
{"/proc/self/fd/1", "/dev/stdout"},
{"/proc/self/fd/2", "/dev/stderr"},
}
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
// in /dev if it exists in /proc.
if _, err := os.Stat("/proc/kcore"); err == nil {
links = append(links, [2]string{"/proc/kcore", "/dev/kcore"})
}
for _, link := range links {
var (
src = link[0]
dst = filepath.Join(rootfs, link[1])
)
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
return fmt.Errorf("symlink %s %s %s", src, dst, err)
}
}
return nil
}
func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
for _, m := range bindMounts.OfType("bind") {
var (
flags = syscall.MS_BIND | syscall.MS_REC
dest = filepath.Join(rootfs, m.Destination)
)
if !m.Writable {
flags = flags | syscall.MS_RDONLY
}
stat, err := os.Stat(m.Source)
if err != nil {
return err
}
dest, err = symlink.FollowSymlinkInScope(dest, rootfs)
if err != nil {
return err
}
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return fmt.Errorf("Creating new bind-mount target, %s", err)
}
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
}
if !m.Writable {
if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
}
}
if m.Private {
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return fmt.Errorf("mounting %s private %s", dest, err)
}
}
}
return nil
}
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
// standard bind mounts allowing them to be more dynamic
func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
systemMounts := []mount{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
}
return systemMounts
}

View file

@ -1,19 +0,0 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func MsMoveRoot(rootfs string) error {
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return fmt.Errorf("mount move %s into / %s", rootfs, err)
}
if err := system.Chroot("."); err != nil {
return fmt.Errorf("chroot . %s", err)
}
return system.Chdir("/")
}

View file

@ -1,53 +0,0 @@
// +build linux
package nodes
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/dotcloud/docker/pkg/libcontainer/devices"
"github.com/dotcloud/docker/pkg/system"
)
// Create the device nodes in the container.
func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
for _, node := range nodesToCreate {
if err := CreateDeviceNode(rootfs, node); err != nil {
return err
}
}
return nil
}
// Creates the device node in the rootfs of the container.
func CreateDeviceNode(rootfs string, node *devices.Device) error {
var (
dest = filepath.Join(rootfs, node.Path)
parent = filepath.Dir(dest)
)
if err := os.MkdirAll(parent, 0755); err != nil {
return err
}
fileMode := node.FileMode
switch node.Type {
case 'c':
fileMode |= syscall.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := system.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) {
return fmt.Errorf("mknod %s %s", node.Path, err)
}
return nil
}

View file

@ -1,12 +0,0 @@
// +build !linux
package nodes
import (
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/devices"
)
func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error {
return libcontainer.ErrUnsupported
}

View file

@ -1,31 +0,0 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
func PivotRoot(rootfs string) error {
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err)
}
if err := system.Pivotroot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
return os.Remove(pivotDir)
}

View file

@ -1,26 +0,0 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer/console"
"os"
"path/filepath"
)
func SetupPtmx(rootfs, consolePath, mountLabel string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if consolePath != "" {
if err := console.Setup(rootfs, consolePath, mountLabel); err != nil {
return err
}
}
return nil
}

View file

@ -1,12 +0,0 @@
// +build linux
package mount
import (
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func SetReadonly() error {
return system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}

View file

@ -1,31 +0,0 @@
// +build linux
package mount
import (
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func RemountProc() error {
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
return err
}
if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
return err
}
return nil
}
func RemountSys() error {
if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil {
if err != syscall.EINVAL {
return err
}
} else {
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
return err
}
}
return nil
}

View file

@ -1,10 +0,0 @@
package namespaces
import (
"os"
"os/exec"
"github.com/dotcloud/docker/pkg/libcontainer"
)
type CreateCommand func(container *libcontainer.Container, console, rootfs, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd

View file

@ -1,176 +0,0 @@
// +build linux
package namespaces
import (
"os"
"os/exec"
"syscall"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups/fs"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups/systemd"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/system"
)
// Exec performes setup outside of a namespace so that a container can be
// executed. Exec is a high level function for working with container namespaces.
func Exec(container *libcontainer.Container, term Terminal, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
var (
master *os.File
console string
err error
)
// create a pipe so that we can syncronize with the namespaced process and
// pass the veth name to the child
syncPipe, err := NewSyncPipe()
if err != nil {
return -1, err
}
if container.Tty {
master, console, err = system.CreateMasterAndConsole()
if err != nil {
return -1, err
}
term.SetMaster(master)
}
command := createCommand(container, console, rootfs, dataPath, os.Args[0], syncPipe.child, args)
if err := term.Attach(command); err != nil {
return -1, err
}
defer term.Close()
if err := command.Start(); err != nil {
return -1, err
}
started, err := system.GetProcessStartTime(command.Process.Pid)
if err != nil {
return -1, err
}
if err := WritePid(dataPath, command.Process.Pid, started); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
defer DeletePid(dataPath)
// Do this before syncing with child so that no children
// can escape the cgroup
cleaner, err := SetupCgroups(container, command.Process.Pid)
if err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
if cleaner != nil {
defer cleaner.Cleanup()
}
if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
// Sync with child
syncPipe.Close()
if startCallback != nil {
startCallback()
}
if err := command.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); !ok {
return -1, err
}
}
return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
}
// DefaultCreateCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
// defined on the container's configuration and use the current binary as the init with the
// args provided
//
// console: the /dev/console to setup inside the container
// init: the progam executed inside the namespaces
// root: the path to the container json file and information
// pipe: sync pipe to syncronize the parent and child processes
// args: the arguemnts to pass to the container to run as the user's program
func DefaultCreateCommand(container *libcontainer.Container, console, rootfs, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
// get our binary name from arg0 so we can always reexec ourself
env := []string{
"console=" + console,
"pipe=3",
"data_path=" + dataPath,
}
/*
TODO: move user and wd into env
if user != "" {
env = append(env, "user="+user)
}
if workingDir != "" {
env = append(env, "wd="+workingDir)
}
*/
command := exec.Command(init, append([]string{"init"}, args...)...)
// make sure the process is executed inside the context of the rootfs
command.Dir = rootfs
command.Env = append(os.Environ(), env...)
system.SetCloneFlags(command, uintptr(GetNamespaceFlags(container.Namespaces)))
command.SysProcAttr.Pdeathsig = syscall.SIGKILL
command.ExtraFiles = []*os.File{pipe}
return command
}
// SetupCgroups applies the cgroup restrictions to the process running in the contaienr based
// on the container's configuration
func SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) {
if container.Cgroups != nil {
c := container.Cgroups
if systemd.UseSystemd() {
return systemd.Apply(c, nspid)
}
return fs.Apply(c, nspid)
}
return nil, nil
}
// InitializeNetworking creates the container's network stack outside of the namespace and moves
// interfaces into the container's net namespaces if necessary
func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error {
context := libcontainer.Context{}
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.Create(config, nspid, context); err != nil {
return err
}
}
return pipe.SendToChild(context)
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces map[string]bool) (flag int) {
for key, enabled := range namespaces {
if enabled {
if ns := libcontainer.GetNamespace(key); ns != nil {
flag |= ns.Value
}
}
}
return flag
}

View file

@ -1,56 +0,0 @@
// +build linux
package namespaces
import (
"encoding/json"
"os"
"strconv"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/label"
"github.com/dotcloud/docker/pkg/system"
)
// ExecIn uses an existing pid and joins the pid's namespaces with the new command.
func ExecIn(container *libcontainer.Container, nspid int, args []string) error {
// TODO(vmarmol): If this gets too long, send it over a pipe to the child.
// Marshall the container into JSON since it won't be available in the namespace.
containerJson, err := json.Marshal(container)
if err != nil {
return err
}
// TODO(vmarmol): Move this to the container JSON.
processLabel, err := label.GetPidCon(nspid)
if err != nil {
return err
}
// Enter the namespace and then finish setup
finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)}
finalArgs = append(finalArgs, args...)
if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil {
return err
}
panic("unreachable")
}
// NsEnter is run after entering the namespace.
func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error {
// clear the current processes env and replace it with the environment
// defined on the container
if err := LoadContainerEnvironment(container); err != nil {
return err
}
if err := FinalizeNamespace(container); err != nil {
return err
}
if err := label.SetProcessLabel(processLabel); err != nil {
return err
}
if err := system.Execv(args[0], args[0:], container.Env); err != nil {
return err
}
panic("unreachable")
}

View file

@ -1,235 +0,0 @@
// +build linux
package namespaces
import (
"fmt"
"os"
"runtime"
"strings"
"syscall"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/apparmor"
"github.com/dotcloud/docker/pkg/libcontainer/console"
"github.com/dotcloud/docker/pkg/libcontainer/label"
"github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/libcontainer/netlink"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/user"
)
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
// and other options required for the new container.
func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
if err != nil {
return err
}
// clear the current processes env and replace it with the environment
// defined on the container
if err := LoadContainerEnvironment(container); err != nil {
return err
}
// We always read this as it is a way to sync with the parent as well
context, err := syncPipe.ReadFromParent()
if err != nil {
syncPipe.Close()
return err
}
syncPipe.Close()
if consolePath != "" {
if err := console.OpenAndDup(consolePath); err != nil {
return err
}
}
if _, err := system.Setsid(); err != nil {
return fmt.Errorf("setsid %s", err)
}
if consolePath != "" {
if err := system.Setctty(); err != nil {
return fmt.Errorf("setctty %s", err)
}
}
if err := setupNetwork(container, context); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := setupRoute(container); err != nil {
return fmt.Errorf("setup route %s", err)
}
label.Init()
if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if container.Hostname != "" {
if err := system.Sethostname(container.Hostname); err != nil {
return fmt.Errorf("sethostname %s", err)
}
}
runtime.LockOSThread()
if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil {
return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err)
}
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
return fmt.Errorf("set process label %s", err)
}
if container.Context["restrictions"] != "" {
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil {
return err
}
}
pdeathSignal, err := system.GetParentDeathSignal()
if err != nil {
return fmt.Errorf("get parent death signal %s", err)
}
if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
// FinalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
return fmt.Errorf("restore parent death signal %s", err)
}
return system.Execv(args[0], args[0:], container.Env)
}
// RestoreParentDeathSignal sets the parent death signal to old.
func RestoreParentDeathSignal(old int) error {
if old == 0 {
return nil
}
current, err := system.GetParentDeathSignal()
if err != nil {
return fmt.Errorf("get parent death signal %s", err)
}
if old == current {
return nil
}
if err := system.ParentDeathSignal(uintptr(old)); err != nil {
return fmt.Errorf("set parent death signal %s", err)
}
// Signal self if parent is already dead. Does nothing if running in a new
// PID namespace, as Getppid will always return 0.
if syscall.Getppid() == 1 {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
return nil
}
// SetupUser changes the groups, gid, and uid for the user inside the container
func SetupUser(u string) error {
uid, gid, suppGids, err := user.GetUserGroupSupplementary(u, syscall.Getuid(), syscall.Getgid())
if err != nil {
return fmt.Errorf("get supplementary groups %s", err)
}
if err := system.Setgroups(suppGids); err != nil {
return fmt.Errorf("setgroups %s", err)
}
if err := system.Setgid(gid); err != nil {
return fmt.Errorf("setgid %s", err)
}
if err := system.Setuid(uid); err != nil {
return fmt.Errorf("setuid %s", err)
}
return nil
}
// setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway
func setupNetwork(container *libcontainer.Container, context libcontainer.Context) error {
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
err1 := strategy.Initialize(config, context)
if err1 != nil {
return err1
}
}
return nil
}
func setupRoute(container *libcontainer.Container) error {
for _, config := range container.Routes {
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
return err
}
}
return nil
}
// FinalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaky file descriptors
// before execing the command inside the namespace
func FinalizeNamespace(container *libcontainer.Container) error {
if err := system.CloseFdsFrom(3); err != nil {
return fmt.Errorf("close open file descriptors %s", err)
}
// drop capabilities in bounding set before changing user
if err := capabilities.DropBoundingSet(container); err != nil {
return fmt.Errorf("drop bounding set %s", err)
}
// preserve existing capabilities while we change users
if err := system.SetKeepCaps(); err != nil {
return fmt.Errorf("set keep caps %s", err)
}
if err := SetupUser(container.User); err != nil {
return fmt.Errorf("setup user %s", err)
}
if err := system.ClearKeepCaps(); err != nil {
return fmt.Errorf("clear keep caps %s", err)
}
// drop all other capabilities
if err := capabilities.DropCapabilities(container); err != nil {
return fmt.Errorf("drop capabilities %s", err)
}
if container.WorkingDir != "" {
if err := system.Chdir(container.WorkingDir); err != nil {
return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
}
}
return nil
}
func LoadContainerEnvironment(container *libcontainer.Container) error {
os.Clearenv()
for _, pair := range container.Env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment '%v'", pair)
}
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}
return nil
}

View file

@ -1,143 +0,0 @@
package namespaces
/*
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
static const kBufSize = 256;
void get_args(int *argc, char ***argv) {
// Read argv
int fd = open("/proc/self/cmdline", O_RDONLY);
// Read the whole commandline.
ssize_t contents_size = 0;
ssize_t contents_offset = 0;
char *contents = NULL;
ssize_t bytes_read = 0;
do {
contents_size += kBufSize;
contents = (char *) realloc(contents, contents_size);
bytes_read = read(fd, contents + contents_offset, contents_size - contents_offset);
contents_offset += bytes_read;
} while (bytes_read > 0);
close(fd);
// Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args.
ssize_t i;
*argc = 0;
for (i = 0; i < contents_offset; i++) {
if (contents[i] == '\0') {
(*argc)++;
}
}
*argv = (char **) malloc(sizeof(char *) * ((*argc) + 1));
int idx;
for (idx = 0; idx < (*argc); idx++) {
(*argv)[idx] = contents;
contents += strlen(contents) + 1;
}
(*argv)[*argc] = NULL;
}
void nsenter() {
int argc;
char **argv;
get_args(&argc, &argv);
// Ignore if this is not for us.
if (argc < 2 || strcmp(argv[1], "nsenter") != 0) {
return;
}
// USAGE: <binary> nsenter <PID> <process label> <container JSON> <argv>...
if (argc < 6) {
fprintf(stderr, "nsenter: Incorrect usage, not enough arguments\n");
exit(1);
}
pid_t init_pid = strtol(argv[2], NULL, 10);
if (errno != 0 || init_pid <= 0) {
fprintf(stderr, "nsenter: Failed to parse PID from \"%s\" with error: \"%s\"\n", argv[2], strerror(errno));
exit(1);
}
argc -= 3;
argv += 3;
// Setns on all supported namespaces.
char ns_dir[kBufSize];
memset(ns_dir, 0, kBufSize);
if (snprintf(ns_dir, kBufSize - 1, "/proc/%d/ns/", init_pid) < 0) {
fprintf(stderr, "nsenter: Error getting ns dir path with error: \"%s\"\n", strerror(errno));
exit(1);
}
struct dirent *dent;
DIR *dir = opendir(ns_dir);
if (dir == NULL) {
fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno));
exit(1);
}
while((dent = readdir(dir)) != NULL) {
if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || strcmp(dent->d_name, "user") == 0) {
continue;
}
// Get and open the namespace for the init we are joining..
char buf[kBufSize];
memset(buf, 0, kBufSize);
strncat(buf, ns_dir, kBufSize - 1);
strncat(buf, dent->d_name, kBufSize - 1);
int fd = open(buf, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", buf, dent->d_name, strerror(errno));
exit(1);
}
// Set the namespace.
if (setns(fd, 0) == -1) {
fprintf(stderr, "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", dent->d_name, strerror(errno));
exit(1);
}
close(fd);
}
closedir(dir);
// We must fork to actually enter the PID namespace.
int child = fork();
if (child == 0) {
// Finish executing, let the Go runtime take over.
return;
} else {
// Parent, wait for the child.
int status = 0;
if (waitpid(child, &status, 0) == -1) {
fprintf(stderr, "nsenter: Failed to waitpid with error: \"%s\"\n", strerror(errno));
exit(1);
}
// Forward the child's exit code or re-send its death signal.
if (WIFEXITED(status)) {
exit(WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
kill(getpid(), WTERMSIG(status));
}
exit(1);
}
return;
}
__attribute__((constructor)) init() {
nsenter();
}
*/
import "C"

View file

@ -1,28 +0,0 @@
package namespaces
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
)
// WritePid writes the namespaced processes pid to pid and it's start time
// to the path specified
func WritePid(path string, pid int, startTime string) error {
err := ioutil.WriteFile(filepath.Join(path, "pid"), []byte(fmt.Sprint(pid)), 0655)
if err != nil {
return err
}
return ioutil.WriteFile(filepath.Join(path, "start"), []byte(startTime), 0655)
}
// DeletePid removes the pid and started file from disk when the container's process
// dies and the container is cleanly removed
func DeletePid(path string) error {
err := os.Remove(filepath.Join(path, "pid"))
if serr := os.Remove(filepath.Join(path, "start")); err == nil {
err = serr
}
return err
}

View file

@ -1,49 +0,0 @@
package namespaces
import (
"io"
"os"
"os/exec"
)
type StdTerminal struct {
stdin io.Reader
stdout, stderr io.Writer
}
func (s *StdTerminal) SetMaster(*os.File) {
// no need to set master on non tty
}
func (s *StdTerminal) Close() error {
return nil
}
func (s *StdTerminal) Resize(h, w int) error {
return nil
}
func (s *StdTerminal) Attach(command *exec.Cmd) error {
inPipe, err := command.StdinPipe()
if err != nil {
return err
}
outPipe, err := command.StdoutPipe()
if err != nil {
return err
}
errPipe, err := command.StderrPipe()
if err != nil {
return err
}
go func() {
defer inPipe.Close()
io.Copy(inPipe, s.stdin)
}()
go io.Copy(s.stdout, outPipe)
go io.Copy(s.stderr, errPipe)
return nil
}

View file

@ -1,80 +0,0 @@
package namespaces
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"github.com/dotcloud/docker/pkg/libcontainer"
)
// SyncPipe allows communication to and from the child processes
// to it's parent and allows the two independent processes to
// syncronize their state.
type SyncPipe struct {
parent, child *os.File
}
func NewSyncPipe() (s *SyncPipe, err error) {
s = &SyncPipe{}
s.child, s.parent, err = os.Pipe()
if err != nil {
return nil, err
}
return s, nil
}
func NewSyncPipeFromFd(parendFd, childFd uintptr) (*SyncPipe, error) {
s := &SyncPipe{}
if parendFd > 0 {
s.parent = os.NewFile(parendFd, "parendPipe")
} else if childFd > 0 {
s.child = os.NewFile(childFd, "childPipe")
} else {
return nil, fmt.Errorf("no valid sync pipe fd specified")
}
return s, nil
}
func (s *SyncPipe) Child() *os.File {
return s.child
}
func (s *SyncPipe) Parent() *os.File {
return s.parent
}
func (s *SyncPipe) SendToChild(context libcontainer.Context) error {
data, err := json.Marshal(context)
if err != nil {
return err
}
s.parent.Write(data)
return nil
}
func (s *SyncPipe) ReadFromParent() (libcontainer.Context, error) {
data, err := ioutil.ReadAll(s.child)
if err != nil {
return nil, fmt.Errorf("error reading from sync pipe %s", err)
}
var context libcontainer.Context
if len(data) > 0 {
if err := json.Unmarshal(data, &context); err != nil {
return nil, err
}
}
return context, nil
}
func (s *SyncPipe) Close() error {
if s.parent != nil {
s.parent.Close()
}
if s.child != nil {
s.child.Close()
}
return nil
}

View file

@ -1,29 +0,0 @@
package namespaces
import (
"io"
"os"
"os/exec"
)
type Terminal interface {
io.Closer
SetMaster(*os.File)
Attach(*exec.Cmd) error
Resize(h, w int) error
}
func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal {
if tty {
return &TtyTerminal{
stdin: stdin,
stdout: stdout,
stderr: stderr,
}
}
return &StdTerminal{
stdin: stdin,
stdout: stdout,
stderr: stderr,
}
}

View file

@ -1,56 +0,0 @@
package namespaces
import (
"io"
"os"
"os/exec"
"github.com/dotcloud/docker/pkg/term"
)
type TtyTerminal struct {
stdin io.Reader
stdout, stderr io.Writer
master *os.File
state *term.State
}
func (t *TtyTerminal) Resize(h, w int) error {
return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
}
func (t *TtyTerminal) SetMaster(master *os.File) {
t.master = master
}
func (t *TtyTerminal) Attach(command *exec.Cmd) error {
go io.Copy(t.stdout, t.master)
go io.Copy(t.master, t.stdin)
state, err := t.setupWindow(t.master, os.Stdin)
if err != nil {
return err
}
t.state = state
return err
}
// SetupWindow gets the parent window size and sets the master
// pty to the current size and set the parents mode to RAW
func (t *TtyTerminal) setupWindow(master, parent *os.File) (*term.State, error) {
ws, err := term.GetWinsize(parent.Fd())
if err != nil {
return nil, err
}
if err := term.SetWinsize(master.Fd(), ws); err != nil {
return nil, err
}
return term.SetRawTerminal(parent.Fd())
}
func (t *TtyTerminal) Close() error {
term.RestoreTerminal(os.Stdin.Fd(), t.state)
return t.master.Close()
}

View file

@ -1,28 +0,0 @@
// +build !linux
package namespaces
import (
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
)
func Exec(container *libcontainer.Container, term Terminal, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
return -1, libcontainer.ErrUnsupported
}
func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
return libcontainer.ErrUnsupported
}
func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error {
return libcontainer.ErrUnsupported
}
func SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) {
return nil, libcontainer.ErrUnsupported
}
func GetNamespaceFlags(namespaces map[string]bool) (flag int) {
return 0
}

View file

@ -1,2 +0,0 @@
Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
Guillaume J. Charmes <guillaume@docker.com> (@creack)

View file

@ -1,23 +0,0 @@
// Packet netlink provide access to low level Netlink sockets and messages.
//
// Actual implementations are in:
// netlink_linux.go
// netlink_darwin.go
package netlink
import (
"errors"
"net"
)
var (
ErrWrongSockType = errors.New("Wrong socket type")
ErrShortResponse = errors.New("Got short response from netlink")
)
// A Route is a subnet associated with the interface to reach it.
type Route struct {
*net.IPNet
Iface *net.Interface
Default bool
}

View file

@ -1,964 +0,0 @@
// +build amd64
package netlink
import (
"encoding/binary"
"fmt"
"math/rand"
"net"
"sync/atomic"
"syscall"
"unsafe"
)
const (
IFNAMSIZ = 16
DEFAULT_CHANGE = 0xFFFFFFFF
IFLA_INFO_KIND = 1
IFLA_INFO_DATA = 2
VETH_INFO_PEER = 1
IFLA_NET_NS_FD = 28
SIOC_BRADDBR = 0x89a0
SIOC_BRADDIF = 0x89a2
)
var nextSeqNr uint32
type ifreqHwaddr struct {
IfrnName [16]byte
IfruHwaddr syscall.RawSockaddr
}
type ifreqIndex struct {
IfrnName [16]byte
IfruIndex int32
}
func nativeEndian() binary.ByteOrder {
var x uint32 = 0x01020304
if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
return binary.BigEndian
}
return binary.LittleEndian
}
func getIpFamily(ip net.IP) int {
if len(ip) <= net.IPv4len {
return syscall.AF_INET
}
if ip.To4() != nil {
return syscall.AF_INET
}
return syscall.AF_INET6
}
type NetlinkRequestData interface {
Len() int
ToWireFormat() []byte
}
type IfInfomsg struct {
syscall.IfInfomsg
}
func newIfInfomsg(family int) *IfInfomsg {
return &IfInfomsg{
IfInfomsg: syscall.IfInfomsg{
Family: uint8(family),
},
}
}
func newIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
msg := newIfInfomsg(family)
parent.children = append(parent.children, msg)
return msg
}
func (msg *IfInfomsg) ToWireFormat() []byte {
native := nativeEndian()
length := syscall.SizeofIfInfomsg
b := make([]byte, length)
b[0] = msg.Family
b[1] = 0
native.PutUint16(b[2:4], msg.Type)
native.PutUint32(b[4:8], uint32(msg.Index))
native.PutUint32(b[8:12], msg.Flags)
native.PutUint32(b[12:16], msg.Change)
return b
}
func (msg *IfInfomsg) Len() int {
return syscall.SizeofIfInfomsg
}
type IfAddrmsg struct {
syscall.IfAddrmsg
}
func newIfAddrmsg(family int) *IfAddrmsg {
return &IfAddrmsg{
IfAddrmsg: syscall.IfAddrmsg{
Family: uint8(family),
},
}
}
func (msg *IfAddrmsg) ToWireFormat() []byte {
native := nativeEndian()
length := syscall.SizeofIfAddrmsg
b := make([]byte, length)
b[0] = msg.Family
b[1] = msg.Prefixlen
b[2] = msg.Flags
b[3] = msg.Scope
native.PutUint32(b[4:8], msg.Index)
return b
}
func (msg *IfAddrmsg) Len() int {
return syscall.SizeofIfAddrmsg
}
type RtMsg struct {
syscall.RtMsg
}
func newRtMsg() *RtMsg {
return &RtMsg{
RtMsg: syscall.RtMsg{
Table: syscall.RT_TABLE_MAIN,
Scope: syscall.RT_SCOPE_UNIVERSE,
Protocol: syscall.RTPROT_BOOT,
Type: syscall.RTN_UNICAST,
},
}
}
func (msg *RtMsg) ToWireFormat() []byte {
native := nativeEndian()
length := syscall.SizeofRtMsg
b := make([]byte, length)
b[0] = msg.Family
b[1] = msg.Dst_len
b[2] = msg.Src_len
b[3] = msg.Tos
b[4] = msg.Table
b[5] = msg.Protocol
b[6] = msg.Scope
b[7] = msg.Type
native.PutUint32(b[8:12], msg.Flags)
return b
}
func (msg *RtMsg) Len() int {
return syscall.SizeofRtMsg
}
func rtaAlignOf(attrlen int) int {
return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1)
}
type RtAttr struct {
syscall.RtAttr
Data []byte
children []NetlinkRequestData
}
func newRtAttr(attrType int, data []byte) *RtAttr {
return &RtAttr{
RtAttr: syscall.RtAttr{
Type: uint16(attrType),
},
children: []NetlinkRequestData{},
Data: data,
}
}
func newRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
attr := newRtAttr(attrType, data)
parent.children = append(parent.children, attr)
return attr
}
func (a *RtAttr) Len() int {
l := 0
for _, child := range a.children {
l += child.Len() + syscall.SizeofRtAttr
}
if l == 0 {
l++
}
return rtaAlignOf(l + len(a.Data))
}
func (a *RtAttr) ToWireFormat() []byte {
native := nativeEndian()
length := a.Len()
buf := make([]byte, rtaAlignOf(length+syscall.SizeofRtAttr))
if a.Data != nil {
copy(buf[4:], a.Data)
} else {
next := 4
for _, child := range a.children {
childBuf := child.ToWireFormat()
copy(buf[next:], childBuf)
next += rtaAlignOf(len(childBuf))
}
}
if l := uint16(rtaAlignOf(length)); l != 0 {
native.PutUint16(buf[0:2], l+1)
}
native.PutUint16(buf[2:4], a.Type)
return buf
}
type NetlinkRequest struct {
syscall.NlMsghdr
Data []NetlinkRequestData
}
func (rr *NetlinkRequest) ToWireFormat() []byte {
native := nativeEndian()
length := rr.Len
dataBytes := make([][]byte, len(rr.Data))
for i, data := range rr.Data {
dataBytes[i] = data.ToWireFormat()
length += uint32(len(dataBytes[i]))
}
b := make([]byte, length)
native.PutUint32(b[0:4], length)
native.PutUint16(b[4:6], rr.Type)
native.PutUint16(b[6:8], rr.Flags)
native.PutUint32(b[8:12], rr.Seq)
native.PutUint32(b[12:16], rr.Pid)
next := 16
for _, data := range dataBytes {
copy(b[next:], data)
next += len(data)
}
return b
}
func (rr *NetlinkRequest) AddData(data NetlinkRequestData) {
if data != nil {
rr.Data = append(rr.Data, data)
}
}
func newNetlinkRequest(proto, flags int) *NetlinkRequest {
return &NetlinkRequest{
NlMsghdr: syscall.NlMsghdr{
Len: uint32(syscall.NLMSG_HDRLEN),
Type: uint16(proto),
Flags: syscall.NLM_F_REQUEST | uint16(flags),
Seq: atomic.AddUint32(&nextSeqNr, 1),
},
}
}
type NetlinkSocket struct {
fd int
lsa syscall.SockaddrNetlink
}
func getNetlinkSocket() (*NetlinkSocket, error) {
fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_ROUTE)
if err != nil {
return nil, err
}
s := &NetlinkSocket{
fd: fd,
}
s.lsa.Family = syscall.AF_NETLINK
if err := syscall.Bind(fd, &s.lsa); err != nil {
syscall.Close(fd)
return nil, err
}
return s, nil
}
func (s *NetlinkSocket) Close() {
syscall.Close(s.fd)
}
func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
if err := syscall.Sendto(s.fd, request.ToWireFormat(), 0, &s.lsa); err != nil {
return err
}
return nil
}
func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
rb := make([]byte, syscall.Getpagesize())
nr, _, err := syscall.Recvfrom(s.fd, rb, 0)
if err != nil {
return nil, err
}
if nr < syscall.NLMSG_HDRLEN {
return nil, ErrShortResponse
}
rb = rb[:nr]
return syscall.ParseNetlinkMessage(rb)
}
func (s *NetlinkSocket) GetPid() (uint32, error) {
lsa, err := syscall.Getsockname(s.fd)
if err != nil {
return 0, err
}
switch v := lsa.(type) {
case *syscall.SockaddrNetlink:
return v.Pid, nil
}
return 0, ErrWrongSockType
}
func (s *NetlinkSocket) HandleAck(seq uint32) error {
native := nativeEndian()
pid, err := s.GetPid()
if err != nil {
return err
}
done:
for {
msgs, err := s.Receive()
if err != nil {
return err
}
for _, m := range msgs {
if m.Header.Seq != seq {
return fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, seq)
}
if m.Header.Pid != pid {
return fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
}
if m.Header.Type == syscall.NLMSG_DONE {
break done
}
if m.Header.Type == syscall.NLMSG_ERROR {
error := int32(native.Uint32(m.Data[0:4]))
if error == 0 {
break done
}
return syscall.Errno(-error)
}
}
}
return nil
}
// Add a new route table entry.
func AddRoute(destination, source, gateway, device string) error {
if destination == "" && source == "" && gateway == "" {
return fmt.Errorf("one of destination, source or gateway must not be blank")
}
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := newRtMsg()
currentFamily := -1
var rtAttrs []*RtAttr
if destination != "" {
destIP, destNet, err := net.ParseCIDR(destination)
if err != nil {
return fmt.Errorf("destination CIDR %s couldn't be parsed", destination)
}
destFamily := getIpFamily(destIP)
currentFamily = destFamily
destLen, bits := destNet.Mask.Size()
if destLen == 0 && bits == 0 {
return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination)
}
msg.Family = uint8(destFamily)
msg.Dst_len = uint8(destLen)
var destData []byte
if destFamily == syscall.AF_INET {
destData = destIP.To4()
} else {
destData = destIP.To16()
}
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData))
}
if source != "" {
srcIP, srcNet, err := net.ParseCIDR(source)
if err != nil {
return fmt.Errorf("source CIDR %s couldn't be parsed", source)
}
srcFamily := getIpFamily(srcIP)
if currentFamily != -1 && currentFamily != srcFamily {
return fmt.Errorf("source and destination ip were not the same IP family")
}
currentFamily = srcFamily
srcLen, bits := srcNet.Mask.Size()
if srcLen == 0 && bits == 0 {
return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source)
}
msg.Family = uint8(srcFamily)
msg.Src_len = uint8(srcLen)
var srcData []byte
if srcFamily == syscall.AF_INET {
srcData = srcIP.To4()
} else {
srcData = srcIP.To16()
}
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData))
}
if gateway != "" {
gwIP := net.ParseIP(gateway)
if gwIP == nil {
return fmt.Errorf("gateway IP %s couldn't be parsed", gateway)
}
gwFamily := getIpFamily(gwIP)
if currentFamily != -1 && currentFamily != gwFamily {
return fmt.Errorf("gateway, source, and destination ip were not the same IP family")
}
msg.Family = uint8(gwFamily)
var gwData []byte
if gwFamily == syscall.AF_INET {
gwData = gwIP.To4()
} else {
gwData = gwIP.To16()
}
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData))
}
wb.AddData(msg)
for _, attr := range rtAttrs {
wb.AddData(attr)
}
var (
native = nativeEndian()
b = make([]byte, 4)
)
iface, err := net.InterfaceByName(device)
if err != nil {
return err
}
native.PutUint32(b, uint32(iface.Index))
wb.AddData(newRtAttr(syscall.RTA_OIF, b))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Add a new default gateway. Identical to:
// ip route add default via $ip
func AddDefaultGw(ip, device string) error {
return AddRoute("", "", ip, device)
}
// Bring up a particular network interface
func NetworkLinkUp(iface *net.Interface) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Change = syscall.IFF_UP
msg.Flags = syscall.IFF_UP
msg.Index = int32(iface.Index)
wb.AddData(msg)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func NetworkLinkDown(iface *net.Interface) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Change = syscall.IFF_UP
msg.Flags = 0 & ^syscall.IFF_UP
msg.Index = int32(iface.Index)
wb.AddData(msg)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func NetworkSetMTU(iface *net.Interface, mtu int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
var (
b = make([]byte, 4)
native = nativeEndian()
)
native.PutUint32(b, uint32(mtu))
data := newRtAttr(syscall.IFLA_MTU, b)
wb.AddData(data)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// same as ip link set $name master $master
func NetworkSetMaster(iface, master *net.Interface) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
var (
b = make([]byte, 4)
native = nativeEndian()
)
native.PutUint32(b, uint32(master.Index))
data := newRtAttr(syscall.IFLA_MASTER, b)
wb.AddData(data)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func NetworkSetNsPid(iface *net.Interface, nspid int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
var (
b = make([]byte, 4)
native = nativeEndian()
)
native.PutUint32(b, uint32(nspid))
data := newRtAttr(syscall.IFLA_NET_NS_PID, b)
wb.AddData(data)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func NetworkSetNsFd(iface *net.Interface, fd int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
var (
b = make([]byte, 4)
native = nativeEndian()
)
native.PutUint32(b, uint32(fd))
data := newRtAttr(IFLA_NET_NS_FD, b)
wb.AddData(data)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Add an Ip address to an interface. This is identical to:
// ip addr add $ip/$ipNet dev $iface
func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
family := getIpFamily(ip)
wb := newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := newIfAddrmsg(family)
msg.Index = uint32(iface.Index)
prefixLen, _ := ipNet.Mask.Size()
msg.Prefixlen = uint8(prefixLen)
wb.AddData(msg)
var ipData []byte
if family == syscall.AF_INET {
ipData = ip.To4()
} else {
ipData = ip.To16()
}
localData := newRtAttr(syscall.IFA_LOCAL, ipData)
wb.AddData(localData)
addrData := newRtAttr(syscall.IFA_ADDRESS, ipData)
wb.AddData(addrData)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func zeroTerminated(s string) []byte {
return []byte(s + "\000")
}
func nonZeroTerminated(s string) []byte {
return []byte(s)
}
// Add a new network link of a specified type. This is identical to
// running: ip add link $name type $linkType
func NetworkLinkAdd(name string, linkType string) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
if name != "" {
nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name))
wb.AddData(nameData)
}
kindData := newRtAttr(IFLA_INFO_KIND, nonZeroTerminated(linkType))
infoData := newRtAttr(syscall.IFLA_LINKINFO, kindData.ToWireFormat())
wb.AddData(infoData)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Returns an array of IPNet for all the currently routed subnets on ipv4
// This is similar to the first column of "ip route" output
func NetworkGetRoutes() ([]Route, error) {
native := nativeEndian()
s, err := getNetlinkSocket()
if err != nil {
return nil, err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
if err := s.Send(wb); err != nil {
return nil, err
}
pid, err := s.GetPid()
if err != nil {
return nil, err
}
res := make([]Route, 0)
done:
for {
msgs, err := s.Receive()
if err != nil {
return nil, err
}
for _, m := range msgs {
if m.Header.Seq != wb.Seq {
return nil, fmt.Errorf("Wrong Seq nr %d, expected 1", m.Header.Seq)
}
if m.Header.Pid != pid {
return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
}
if m.Header.Type == syscall.NLMSG_DONE {
break done
}
if m.Header.Type == syscall.NLMSG_ERROR {
error := int32(native.Uint32(m.Data[0:4]))
if error == 0 {
break done
}
return nil, syscall.Errno(-error)
}
if m.Header.Type != syscall.RTM_NEWROUTE {
continue
}
var r Route
msg := (*RtMsg)(unsafe.Pointer(&m.Data[0:syscall.SizeofRtMsg][0]))
if msg.Flags&syscall.RTM_F_CLONED != 0 {
// Ignore cloned routes
continue
}
if msg.Table != syscall.RT_TABLE_MAIN {
// Ignore non-main tables
continue
}
if msg.Family != syscall.AF_INET {
// Ignore non-ipv4 routes
continue
}
if msg.Dst_len == 0 {
// Default routes
r.Default = true
}
attrs, err := syscall.ParseNetlinkRouteAttr(&m)
if err != nil {
return nil, err
}
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_DST:
ip := attr.Value
r.IPNet = &net.IPNet{
IP: ip,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(ip)),
}
case syscall.RTA_OIF:
index := int(native.Uint32(attr.Value[0:4]))
r.Iface, _ = net.InterfaceByIndex(index)
}
}
if r.Default || r.IPNet != nil {
res = append(res, r)
}
}
}
return res, nil
}
func getIfSocket() (fd int, err error) {
for _, socket := range []int{
syscall.AF_INET,
syscall.AF_PACKET,
syscall.AF_INET6,
} {
if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil {
break
}
}
if err == nil {
return fd, nil
}
return -1, err
}
func NetworkChangeName(iface *net.Interface, newName string) error {
fd, err := getIfSocket()
if err != nil {
return err
}
defer syscall.Close(fd)
data := [IFNAMSIZ * 2]byte{}
// the "-1"s here are very important for ensuring we get proper null
// termination of our new C strings
copy(data[:IFNAMSIZ-1], iface.Name)
copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName)
if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
return errno
}
return nil
}
func NetworkCreateVethPair(name1, name2 string) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1))
wb.AddData(nameData)
nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth"))
nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil)
newIfInfomsgChild(nest3, syscall.AF_UNSPEC)
newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2))
wb.AddData(nest1)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Create the actual bridge device. This is more backward-compatible than
// netlink.NetworkLinkAdd and works on RHEL 6.
func CreateBridge(name string, setMacAddr bool) error {
s, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
if err != nil {
// ipv6 issue, creating with ipv4
s, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
if err != nil {
return err
}
}
defer syscall.Close(s)
nameBytePtr, err := syscall.BytePtrFromString(name)
if err != nil {
return err
}
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
return err
}
if setMacAddr {
return setBridgeMacAddress(s, name)
}
return nil
}
// Add a slave to abridge device. This is more backward-compatible than
// netlink.NetworkSetMaster and works on RHEL 6.
func AddToBridge(iface, master *net.Interface) error {
s, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
if err != nil {
// ipv6 issue, creating with ipv4
s, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
if err != nil {
return err
}
}
defer syscall.Close(s)
ifr := ifreqIndex{}
copy(ifr.IfrnName[:], master.Name)
ifr.IfruIndex = int32(iface.Index)
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDIF, uintptr(unsafe.Pointer(&ifr))); err != 0 {
return err
}
return nil
}
func setBridgeMacAddress(s int, name string) error {
ifr := ifreqHwaddr{}
ifr.IfruHwaddr.Family = syscall.ARPHRD_ETHER
copy(ifr.IfrnName[:], name)
for i := 0; i < 6; i++ {
ifr.IfruHwaddr.Data[i] = int8(rand.Intn(255))
}
ifr.IfruHwaddr.Data[0] &^= 0x1 // clear multicast bit
ifr.IfruHwaddr.Data[0] |= 0x2 // set local assignment bit (IEEE802)
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFHWADDR, uintptr(unsafe.Pointer(&ifr))); err != 0 {
return err
}
return nil
}

View file

@ -1,72 +0,0 @@
// +build !linux !amd64
package netlink
import (
"errors"
"net"
)
var (
ErrNotImplemented = errors.New("not implemented")
)
func NetworkGetRoutes() ([]Route, error) {
return nil, ErrNotImplemented
}
func NetworkLinkAdd(name string, linkType string) error {
return ErrNotImplemented
}
func NetworkLinkUp(iface *net.Interface) error {
return ErrNotImplemented
}
func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
return ErrNotImplemented
}
func AddRoute(destination, source, gateway, device string) error {
return ErrNotImplemented
}
func AddDefaultGw(ip, device string) error {
return ErrNotImplemented
}
func NetworkSetMTU(iface *net.Interface, mtu int) error {
return ErrNotImplemented
}
func NetworkCreateVethPair(name1, name2 string) error {
return ErrNotImplemented
}
func NetworkChangeName(iface *net.Interface, newName string) error {
return ErrNotImplemented
}
func NetworkSetNsFd(iface *net.Interface, fd int) error {
return ErrNotImplemented
}
func NetworkSetNsPid(iface *net.Interface, nspid int) error {
return ErrNotImplemented
}
func NetworkSetMaster(iface, master *net.Interface) error {
return ErrNotImplemented
}
func NetworkLinkDown(iface *net.Interface) error {
return ErrNotImplemented
}
func CreateBridge(name string, setMacAddr bool) error {
return ErrNotImplemented
}
func AddToBridge(iface, master *net.Interface) error {
return ErrNotImplemented
}

View file

@ -1,24 +0,0 @@
package network
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer"
)
// Loopback is a network strategy that provides a basic loopback device
type Loopback struct {
}
func (l *Loopback) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error {
return nil
}
func (l *Loopback) Initialize(config *libcontainer.Network, context libcontainer.Context) error {
if err := SetMtu("lo", config.Mtu); err != nil {
return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err)
}
if err := InterfaceUp("lo"); err != nil {
return fmt.Errorf("lo up %s", err)
}
return nil
}

View file

@ -1,34 +0,0 @@
package network
import (
"fmt"
"os"
"syscall"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/system"
)
// crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace
type NetNS struct {
}
func (v *NetNS) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error {
context["nspath"] = n.Context["nspath"]
return nil
}
func (v *NetNS) Initialize(config *libcontainer.Network, context libcontainer.Context) error {
nspath, exists := context["nspath"]
if !exists {
return fmt.Errorf("nspath does not exist in network context")
}
f, err := os.OpenFile(nspath, os.O_RDONLY, 0)
if err != nil {
return fmt.Errorf("failed get network namespace fd: %v", err)
}
if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil {
return fmt.Errorf("failed to setns current network namespace: %v", err)
}
return nil
}

View file

@ -1,78 +0,0 @@
package network
import (
"github.com/dotcloud/docker/pkg/libcontainer/netlink"
"net"
)
func InterfaceUp(name string) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
return netlink.NetworkLinkUp(iface)
}
func InterfaceDown(name string) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
return netlink.NetworkLinkDown(iface)
}
func ChangeInterfaceName(old, newName string) error {
iface, err := net.InterfaceByName(old)
if err != nil {
return err
}
return netlink.NetworkChangeName(iface, newName)
}
func CreateVethPair(name1, name2 string) error {
return netlink.NetworkCreateVethPair(name1, name2)
}
func SetInterfaceInNamespacePid(name string, nsPid int) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
return netlink.NetworkSetNsPid(iface, nsPid)
}
func SetInterfaceMaster(name, master string) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
masterIface, err := net.InterfaceByName(master)
if err != nil {
return err
}
return netlink.AddToBridge(iface, masterIface)
}
func SetDefaultGateway(ip, ifaceName string) error {
return netlink.AddDefaultGw(ip, ifaceName)
}
func SetInterfaceIp(name string, rawIp string) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
ip, ipNet, err := net.ParseCIDR(rawIp)
if err != nil {
return err
}
return netlink.NetworkLinkAddIp(iface, ip, ipNet)
}
func SetMtu(name string, mtu int) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
return netlink.NetworkSetMTU(iface, mtu)
}

View file

@ -1,35 +0,0 @@
package network
import (
"errors"
"github.com/dotcloud/docker/pkg/libcontainer"
)
var (
ErrNotValidStrategyType = errors.New("not a valid network strategy type")
)
var strategies = map[string]NetworkStrategy{
"veth": &Veth{},
"loopback": &Loopback{},
"netns": &NetNS{},
}
// NetworkStrategy represents a specific network configuration for
// a container's networking stack
type NetworkStrategy interface {
Create(*libcontainer.Network, int, libcontainer.Context) error
Initialize(*libcontainer.Network, libcontainer.Context) error
}
// GetStrategy returns the specific network strategy for the
// provided type. If no strategy is registered for the type an
// ErrNotValidStrategyType is returned.
func GetStrategy(tpe string) (NetworkStrategy, error) {
s, exists := strategies[tpe]
if !exists {
return nil, ErrNotValidStrategyType
}
return s, nil
}

View file

@ -1,96 +0,0 @@
package network
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
)
// Veth is a network strategy that uses a bridge and creates
// a veth pair, one that stays outside on the host and the other
// is placed inside the container's namespace
type Veth struct {
}
const defaultDevice = "eth0"
func (v *Veth) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error {
var (
bridge string
prefix string
exists bool
)
if bridge, exists = n.Context["bridge"]; !exists {
return fmt.Errorf("bridge does not exist in network context")
}
if prefix, exists = n.Context["prefix"]; !exists {
return fmt.Errorf("veth prefix does not exist in network context")
}
name1, name2, err := createVethPair(prefix)
if err != nil {
return err
}
context["veth-host"] = name1
context["veth-child"] = name2
if err := SetInterfaceMaster(name1, bridge); err != nil {
return err
}
if err := SetMtu(name1, n.Mtu); err != nil {
return err
}
if err := InterfaceUp(name1); err != nil {
return err
}
if err := SetInterfaceInNamespacePid(name2, nspid); err != nil {
return err
}
return nil
}
func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error {
var (
vethChild string
exists bool
)
if vethChild, exists = context["veth-child"]; !exists {
return fmt.Errorf("vethChild does not exist in network context")
}
if err := InterfaceDown(vethChild); err != nil {
return fmt.Errorf("interface down %s %s", vethChild, err)
}
if err := ChangeInterfaceName(vethChild, defaultDevice); err != nil {
return fmt.Errorf("change %s to %s %s", vethChild, defaultDevice, err)
}
if err := SetInterfaceIp(defaultDevice, config.Address); err != nil {
return fmt.Errorf("set %s ip %s", defaultDevice, err)
}
if err := SetMtu(defaultDevice, config.Mtu); err != nil {
return fmt.Errorf("set %s mtu to %d %s", defaultDevice, config.Mtu, err)
}
if err := InterfaceUp(defaultDevice); err != nil {
return fmt.Errorf("%s up %s", defaultDevice, err)
}
if config.Gateway != "" {
if err := SetDefaultGateway(config.Gateway, defaultDevice); err != nil {
return fmt.Errorf("set gateway to %s on device %s failed with %s", config.Gateway, defaultDevice, err)
}
}
return nil
}
// createVethPair will automatically generage two random names for
// the veth pair and ensure that they have been created
func createVethPair(prefix string) (name1 string, name2 string, err error) {
name1, err = utils.GenerateRandomName(prefix, 4)
if err != nil {
return
}
name2, err = utils.GenerateRandomName(prefix, 4)
if err != nil {
return
}
if err = CreateVethPair(name1, name2); err != nil {
return
}
return
}

View file

@ -1,76 +0,0 @@
package main
import (
"fmt"
"log"
"os"
"os/exec"
"os/signal"
"github.com/codegangsta/cli"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/namespaces"
)
var execCommand = cli.Command{
Name: "exec",
Usage: "execute a new command inside a container",
Action: execAction,
}
func execAction(context *cli.Context) {
var nspid, exitCode int
container, err := loadContainer()
if err != nil {
log.Fatal(err)
}
if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
log.Fatalf("unable to read pid: %s", err)
}
if nspid > 0 {
err = namespaces.ExecIn(container, nspid, []string(context.Args()))
} else {
term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
exitCode, err = startContainer(container, term, dataPath, []string(context.Args()))
}
if err != nil {
log.Fatalf("failed to exec: %s", err)
}
os.Exit(exitCode)
}
// startContainer starts the container. Returns the exit status or -1 and an
// error.
//
// Signals sent to the current process will be forwarded to container.
func startContainer(container *libcontainer.Container, term namespaces.Terminal, dataPath string, args []string) (int, error) {
var (
cmd *exec.Cmd
sigc = make(chan os.Signal, 10)
)
signal.Notify(sigc)
createCommand := func(container *libcontainer.Container, console, rootfs, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
cmd = namespaces.DefaultCreateCommand(container, console, rootfs, dataPath, init, pipe, args)
if logPath != "" {
cmd.Env = append(cmd.Env, fmt.Sprintf("log=%s", logPath))
}
return cmd
}
startCallback := func() {
go func() {
for sig := range sigc {
cmd.Process.Signal(sig)
}
}()
}
return namespaces.Exec(container, term, "", dataPath, args, createCommand, startCallback)
}

View file

@ -1,48 +0,0 @@
package main
import (
"log"
"os"
"strconv"
"github.com/codegangsta/cli"
"github.com/dotcloud/docker/pkg/libcontainer/namespaces"
)
var (
dataPath = os.Getenv("data_path")
console = os.Getenv("console")
rawPipeFd = os.Getenv("pipe")
initCommand = cli.Command{
Name: "init",
Usage: "runs the init process inside the namespace",
Action: initAction,
}
)
func initAction(context *cli.Context) {
container, err := loadContainer()
if err != nil {
log.Fatal(err)
}
rootfs, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
pipeFd, err := strconv.Atoi(rawPipeFd)
if err != nil {
log.Fatal(err)
}
syncPipe, err := namespaces.NewSyncPipeFromFd(0, uintptr(pipeFd))
if err != nil {
log.Fatalf("unable to create sync pipe: %s", err)
}
if err := namespaces.Init(container, rootfs, console, syncPipe, []string(context.Args())); err != nil {
log.Fatalf("unable to initialize for container: %s", err)
}
}

View file

@ -1,40 +0,0 @@
package main
import (
"log"
"os"
"github.com/codegangsta/cli"
)
var logPath = os.Getenv("log")
func preload(context *cli.Context) error {
if logPath != "" {
if err := openLog(logPath); err != nil {
return err
}
}
return nil
}
func main() {
app := cli.NewApp()
app.Name = "nsinit"
app.Version = "0.1"
app.Author = "libcontainer maintainers"
app.Before = preload
app.Commands = []cli.Command{
execCommand,
initCommand,
statsCommand,
specCommand,
nsenterCommand,
}
if err := app.Run(os.Args); err != nil {
log.Fatal(err)
}
}

View file

@ -1,40 +0,0 @@
package main
import (
"log"
"strconv"
"github.com/codegangsta/cli"
"github.com/dotcloud/docker/pkg/libcontainer/namespaces"
)
var nsenterCommand = cli.Command{
Name: "nsenter",
Usage: "init process for entering an existing namespace",
Action: nsenterAction,
}
func nsenterAction(context *cli.Context) {
args := context.Args()
if len(args) < 4 {
log.Fatalf("incorrect usage: <pid> <process label> <container JSON> <cmd>...")
}
container, err := loadContainerFromJson(args.Get(2))
if err != nil {
log.Fatalf("unable to load container: %s", err)
}
nspid, err := strconv.Atoi(args.Get(0))
if err != nil {
log.Fatalf("unable to read pid: %s from %q", err, args.Get(0))
}
if nspid <= 0 {
log.Fatalf("cannot enter into namespaces without valid pid: %q", nspid)
}
if err := namespaces.NsEnter(container, args.Get(1), nspid, args[3:]); err != nil {
log.Fatalf("failed to nsenter: %s", err)
}
}

View file

@ -1,40 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"log"
"github.com/codegangsta/cli"
"github.com/dotcloud/docker/pkg/libcontainer"
)
var specCommand = cli.Command{
Name: "spec",
Usage: "display the container specification",
Action: specAction,
}
func specAction(context *cli.Context) {
container, err := loadContainer()
if err != nil {
log.Fatal(err)
}
spec, err := getContainerSpec(container)
if err != nil {
log.Fatalf("Failed to get spec - %v\n", err)
}
fmt.Printf("Spec:\n%v\n", spec)
}
// returns the container spec in json format.
func getContainerSpec(container *libcontainer.Container) (string, error) {
spec, err := json.MarshalIndent(container, "", "\t")
if err != nil {
return "", err
}
return string(spec), nil
}

View file

@ -1,46 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"log"
"github.com/codegangsta/cli"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/cgroups/fs"
)
var statsCommand = cli.Command{
Name: "stats",
Usage: "display statistics for the container",
Action: statsAction,
}
func statsAction(context *cli.Context) {
container, err := loadContainer()
if err != nil {
log.Fatal(err)
}
stats, err := getContainerStats(container)
if err != nil {
log.Fatalf("Failed to get stats - %v\n", err)
}
fmt.Printf("Stats:\n%v\n", stats)
}
// returns the container stats in json format.
func getContainerStats(container *libcontainer.Container) (string, error) {
stats, err := fs.GetStats(container.Cgroups)
if err != nil {
return "", err
}
out, err := json.MarshalIndent(stats, "", "\t")
if err != nil {
return "", err
}
return string(out), nil
}

View file

@ -1,62 +0,0 @@
package main
import (
"encoding/json"
"io/ioutil"
"log"
"os"
"path/filepath"
"strconv"
"github.com/dotcloud/docker/pkg/libcontainer"
)
func loadContainer() (*libcontainer.Container, error) {
f, err := os.Open(filepath.Join(dataPath, "container.json"))
if err != nil {
return nil, err
}
defer f.Close()
var container *libcontainer.Container
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
return container, nil
}
func readPid() (int, error) {
data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
if err != nil {
return -1, err
}
pid, err := strconv.Atoi(string(data))
if err != nil {
return -1, err
}
return pid, nil
}
func openLog(name string) error {
f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755)
if err != nil {
return err
}
log.SetOutput(f)
return nil
}
func loadContainerFromJson(rawData string) (*libcontainer.Container, error) {
var container *libcontainer.Container
if err := json.Unmarshal([]byte(rawData), &container); err != nil {
return nil, err
}
return container, nil
}

View file

@ -1,57 +0,0 @@
package capabilities
import (
"os"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
// DropBoundingSet drops the capability bounding set to those specified in the
// container configuration.
func DropBoundingSet(container *libcontainer.Container) error {
c, err := capability.NewPid(os.Getpid())
if err != nil {
return err
}
keep := getEnabledCapabilities(container)
c.Clear(capability.BOUNDS)
c.Set(capability.BOUNDS, keep...)
if err := c.Apply(capability.BOUNDS); err != nil {
return err
}
return nil
}
// DropCapabilities drops all capabilities for the current process expect those specified in the container configuration.
func DropCapabilities(container *libcontainer.Container) error {
c, err := capability.NewPid(os.Getpid())
if err != nil {
return err
}
keep := getEnabledCapabilities(container)
c.Clear(allCapabilityTypes)
c.Set(allCapabilityTypes, keep...)
if err := c.Apply(allCapabilityTypes); err != nil {
return err
}
return nil
}
// getEnabledCapabilities returns the capabilities that should not be dropped by the container.
func getEnabledCapabilities(container *libcontainer.Container) []capability.Cap {
keep := []capability.Cap{}
for _, capability := range container.Capabilities {
if c := libcontainer.GetCapability(capability); c != nil {
keep = append(keep, c.Value)
}
}
return keep
}

View file

@ -1,52 +0,0 @@
// +build linux
package restrict
import (
"fmt"
"os"
"syscall"
"time"
"github.com/dotcloud/docker/pkg/system"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
func mountReadonly(path string) error {
for i := 0; i < 5; i++ {
if err := system.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
switch err {
case syscall.EINVAL:
// Probably not a mountpoint, use bind-mount
if err := system.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
return err
}
return system.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
case syscall.EBUSY:
time.Sleep(100 * time.Millisecond)
continue
default:
return err
}
}
return nil
}
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
}
// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
func Restrict(mounts ...string) error {
// remount proc and sys as readonly
for _, dest := range mounts {
if err := mountReadonly(dest); err != nil {
return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
}
}
if err := system.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err)
}
return nil
}

View file

@ -1,9 +0,0 @@
// +build !linux
package restrict
import "fmt"
func Restrict() error {
return fmt.Errorf("not supported")
}

View file

@ -1,398 +0,0 @@
package selinux
import (
"bufio"
"crypto/rand"
"encoding/binary"
"fmt"
"github.com/dotcloud/docker/pkg/mount"
"github.com/dotcloud/docker/pkg/system"
"io"
"os"
"regexp"
"strconv"
"strings"
"syscall"
)
const (
Enforcing = 1
Permissive = 0
Disabled = -1
selinuxDir = "/etc/selinux/"
selinuxConfig = selinuxDir + "config"
selinuxTypeTag = "SELINUXTYPE"
selinuxTag = "SELINUX"
selinuxPath = "/sys/fs/selinux"
xattrNameSelinux = "security.selinux"
stRdOnly = 0x01
)
var (
assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`)
spaceRegex = regexp.MustCompile(`^([^=]+) (.*)$`)
mcsList = make(map[string]bool)
selinuxfs = "unknown"
selinuxEnabled = false
selinuxEnabledChecked = false
)
type SELinuxContext map[string]string
// SetDisabled disables selinux support for the package
func SetDisabled() {
selinuxEnabled, selinuxEnabledChecked = false, true
}
func getSelinuxMountPoint() string {
if selinuxfs != "unknown" {
return selinuxfs
}
selinuxfs = ""
mounts, err := mount.GetMounts()
if err != nil {
return selinuxfs
}
for _, mount := range mounts {
if mount.Fstype == "selinuxfs" {
selinuxfs = mount.Mountpoint
break
}
}
if selinuxfs != "" {
var buf syscall.Statfs_t
syscall.Statfs(selinuxfs, &buf)
if (buf.Flags & stRdOnly) == 1 {
selinuxfs = ""
}
}
return selinuxfs
}
func SelinuxEnabled() bool {
if selinuxEnabledChecked {
return selinuxEnabled
}
selinuxEnabledChecked = true
if fs := getSelinuxMountPoint(); fs != "" {
if con, _ := getcon(); con != "kernel" {
selinuxEnabled = true
}
}
return selinuxEnabled
}
func readConfig(target string) (value string) {
var (
val, key string
bufin *bufio.Reader
)
in, err := os.Open(selinuxConfig)
if err != nil {
return ""
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err != io.EOF {
return ""
}
done = true
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == target {
return strings.Trim(val, "\"")
}
}
}
return ""
}
func getSELinuxPolicyRoot() string {
return selinuxDir + readConfig(selinuxTypeTag)
}
func readCon(name string) (string, error) {
var val string
in, err := os.Open(name)
if err != nil {
return "", err
}
defer in.Close()
_, err = fmt.Fscanf(in, "%s", &val)
return val, err
}
func Setfilecon(path string, scon string) error {
return system.Lsetxattr(path, xattrNameSelinux, []byte(scon), 0)
}
func Setfscreatecon(scon string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", system.Gettid()), scon)
}
func Getfscreatecon() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", system.Gettid()))
}
func getcon() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", system.Gettid()))
}
func Getpidcon(pid int) (string, error) {
return readCon(fmt.Sprintf("/proc/%d/attr/current", pid))
}
func Getexeccon() (string, error) {
return readCon("/proc/self/attr/exec")
}
func writeCon(name string, val string) error {
if !SelinuxEnabled() {
return nil
}
out, err := os.OpenFile(name, os.O_WRONLY, 0)
if err != nil {
return err
}
defer out.Close()
if val != "" {
_, err = out.Write([]byte(val))
} else {
_, err = out.Write(nil)
}
return err
}
func Setexeccon(scon string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", system.Gettid()), scon)
}
func (c SELinuxContext) Get() string {
return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"])
}
func NewContext(scon string) SELinuxContext {
c := make(SELinuxContext)
if len(scon) != 0 {
con := strings.SplitN(scon, ":", 4)
c["user"] = con[0]
c["role"] = con[1]
c["type"] = con[2]
c["level"] = con[3]
}
return c
}
func ReserveLabel(scon string) {
if len(scon) != 0 {
con := strings.SplitN(scon, ":", 4)
mcsAdd(con[3])
}
}
func SelinuxGetEnforce() int {
var enforce int
enforceS, err := readCon(fmt.Sprintf("%s/enforce", selinuxPath))
if err != nil {
return -1
}
enforce, err = strconv.Atoi(string(enforceS))
if err != nil {
return -1
}
return enforce
}
func SelinuxGetEnforceMode() int {
switch readConfig(selinuxTag) {
case "enforcing":
return Enforcing
case "permissive":
return Permissive
}
return Disabled
}
func mcsAdd(mcs string) error {
if mcsList[mcs] {
return fmt.Errorf("MCS Label already exists")
}
mcsList[mcs] = true
return nil
}
func mcsDelete(mcs string) {
mcsList[mcs] = false
}
func mcsExists(mcs string) bool {
return mcsList[mcs]
}
func IntToMcs(id int, catRange uint32) string {
var (
SETSIZE = int(catRange)
TIER = SETSIZE
ORD = id
)
if id < 1 || id > 523776 {
return ""
}
for ORD > TIER {
ORD = ORD - TIER
TIER -= 1
}
TIER = SETSIZE - TIER
ORD = ORD + TIER
return fmt.Sprintf("s0:c%d,c%d", TIER, ORD)
}
func uniqMcs(catRange uint32) string {
var (
n uint32
c1, c2 uint32
mcs string
)
for {
binary.Read(rand.Reader, binary.LittleEndian, &n)
c1 = n % catRange
binary.Read(rand.Reader, binary.LittleEndian, &n)
c2 = n % catRange
if c1 == c2 {
continue
} else {
if c1 > c2 {
t := c1
c1 = c2
c2 = t
}
}
mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2)
if err := mcsAdd(mcs); err != nil {
continue
}
break
}
return mcs
}
func FreeLxcContexts(scon string) {
if len(scon) != 0 {
con := strings.SplitN(scon, ":", 4)
mcsDelete(con[3])
}
}
func GetLxcContexts() (processLabel string, fileLabel string) {
var (
val, key string
bufin *bufio.Reader
)
if !SelinuxEnabled() {
return "", ""
}
lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot())
in, err := os.Open(lxcPath)
if err != nil {
return "", ""
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err == io.EOF {
done = true
} else {
goto exit
}
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == "process" {
processLabel = strings.Trim(val, "\"")
}
if key == "file" {
fileLabel = strings.Trim(val, "\"")
}
}
}
if processLabel == "" || fileLabel == "" {
return "", ""
}
exit:
// mcs := IntToMcs(os.Getpid(), 1024)
mcs := uniqMcs(1024)
scon := NewContext(processLabel)
scon["level"] = mcs
processLabel = scon.Get()
scon = NewContext(fileLabel)
scon["level"] = mcs
fileLabel = scon.Get()
return processLabel, fileLabel
}
func SecurityCheckContext(val string) error {
return writeCon(fmt.Sprintf("%s.context", selinuxPath), val)
}
func CopyLevel(src, dest string) (string, error) {
if !SelinuxEnabled() {
return "", nil
}
if src == "" {
return "", nil
}
if err := SecurityCheckContext(src); err != nil {
return "", err
}
if err := SecurityCheckContext(dest); err != nil {
return "", err
}
scon := NewContext(src)
tcon := NewContext(dest)
mcsDelete(tcon["level"])
mcsAdd(scon["level"])
tcon["level"] = scon["level"]
return tcon.Get(), nil
}

View file

@ -1,61 +0,0 @@
package selinux_test
import (
"github.com/dotcloud/docker/pkg/libcontainer/selinux"
"os"
"testing"
)
func testSetfilecon(t *testing.T) {
if selinux.SelinuxEnabled() {
tmp := "selinux_test"
out, _ := os.OpenFile(tmp, os.O_WRONLY, 0)
out.Close()
err := selinux.Setfilecon(tmp, "system_u:object_r:bin_t:s0")
if err != nil {
t.Log("Setfilecon failed")
t.Fatal(err)
}
os.Remove(tmp)
}
}
func TestSELinux(t *testing.T) {
var (
err error
plabel, flabel string
)
if selinux.SelinuxEnabled() {
t.Log("Enabled")
plabel, flabel = selinux.GetLxcContexts()
t.Log(plabel)
t.Log(flabel)
selinux.FreeLxcContexts(plabel)
plabel, flabel = selinux.GetLxcContexts()
t.Log(plabel)
t.Log(flabel)
selinux.FreeLxcContexts(plabel)
t.Log("getenforce ", selinux.SelinuxGetEnforce())
t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode())
pid := os.Getpid()
t.Log("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023))
err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0")
if err == nil {
t.Log(selinux.Getfscreatecon())
} else {
t.Log("setfscreatecon failed", err)
t.Fatal(err)
}
err = selinux.Setfscreatecon("")
if err == nil {
t.Log(selinux.Getfscreatecon())
} else {
t.Log("setfscreatecon failed", err)
t.Fatal(err)
}
t.Log(selinux.Getpidcon(1))
} else {
t.Log("Disabled")
}
}

View file

@ -1,165 +0,0 @@
package libcontainer
import (
"errors"
"github.com/syndtr/gocapability/capability"
)
var (
ErrUnkownNamespace = errors.New("Unknown namespace")
ErrUnkownCapability = errors.New("Unknown capability")
ErrUnsupported = errors.New("Unsupported method")
)
type Mounts []Mount
func (s Mounts) OfType(t string) Mounts {
out := Mounts{}
for _, m := range s {
if m.Type == t {
out = append(out, m)
}
}
return out
}
type Mount struct {
Type string `json:"type,omitempty"`
Source string `json:"source,omitempty"` // Source path, in the host namespace
Destination string `json:"destination,omitempty"` // Destination path, in the container
Writable bool `json:"writable,omitempty"`
Private bool `json:"private,omitempty"`
}
// namespaceList is used to convert the libcontainer types
// into the names of the files located in /proc/<pid>/ns/* for
// each namespace
var (
namespaceList = Namespaces{}
capabilityList = Capabilities{
{Key: "SETPCAP", Value: capability.CAP_SETPCAP},
{Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE},
{Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO},
{Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT},
{Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN},
{Key: "SYS_NICE", Value: capability.CAP_SYS_NICE},
{Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE},
{Key: "SYS_TIME", Value: capability.CAP_SYS_TIME},
{Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG},
{Key: "MKNOD", Value: capability.CAP_MKNOD},
{Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE},
{Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL},
{Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE},
{Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN},
{Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN},
{Key: "SYSLOG", Value: capability.CAP_SYSLOG},
{Key: "SETUID", Value: capability.CAP_SETUID},
{Key: "SETGID", Value: capability.CAP_SETGID},
{Key: "CHOWN", Value: capability.CAP_CHOWN},
{Key: "NET_RAW", Value: capability.CAP_NET_RAW},
{Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE},
{Key: "FOWNER", Value: capability.CAP_FOWNER},
{Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH},
{Key: "FSETID", Value: capability.CAP_FSETID},
{Key: "KILL", Value: capability.CAP_KILL},
{Key: "SETGID", Value: capability.CAP_SETGID},
{Key: "SETUID", Value: capability.CAP_SETUID},
{Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE},
{Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE},
{Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST},
{Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK},
{Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER},
{Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT},
{Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE},
{Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT},
{Key: "LEASE", Value: capability.CAP_LEASE},
{Key: "SETFCAP", Value: capability.CAP_SETFCAP},
{Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM},
{Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND},
}
)
type (
Namespace struct {
Key string `json:"key,omitempty"`
Value int `json:"value,omitempty"`
File string `json:"file,omitempty"`
}
Namespaces []*Namespace
)
func (ns *Namespace) String() string {
return ns.Key
}
func GetNamespace(key string) *Namespace {
for _, ns := range namespaceList {
if ns.Key == key {
cpy := *ns
return &cpy
}
}
return nil
}
// Contains returns true if the specified Namespace is
// in the slice
func (n Namespaces) Contains(ns string) bool {
return n.Get(ns) != nil
}
func (n Namespaces) Get(ns string) *Namespace {
for _, nsp := range n {
if nsp != nil && nsp.Key == ns {
return nsp
}
}
return nil
}
type (
Capability struct {
Key string `json:"key,omitempty"`
Value capability.Cap `json:"value,omitempty"`
}
Capabilities []*Capability
)
func (c *Capability) String() string {
return c.Key
}
func GetCapability(key string) *Capability {
for _, capp := range capabilityList {
if capp.Key == key {
cpy := *capp
return &cpy
}
}
return nil
}
func GetAllCapabilities() []string {
output := make([]string, len(capabilityList))
for i, capability := range capabilityList {
output[i] = capability.String()
}
return output
}
// Contains returns true if the specified Capability is
// in the slice
func (c Capabilities) Contains(capp string) bool {
return c.Get(capp) != nil
}
func (c Capabilities) Get(capp string) *Capability {
for _, cap := range c {
if cap.Key == capp {
return cap
}
}
return nil
}

View file

@ -1,16 +0,0 @@
package libcontainer
import (
"syscall"
)
func init() {
namespaceList = Namespaces{
{Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"},
{Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"},
{Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"},
{Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"},
{Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"},
{Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"},
}
}

View file

@ -1,44 +0,0 @@
package libcontainer
import (
"testing"
)
func TestNamespacesContains(t *testing.T) {
ns := Namespaces{
GetNamespace("NEWPID"),
GetNamespace("NEWNS"),
GetNamespace("NEWUTS"),
}
if ns.Contains("NEWNET") {
t.Fatal("namespaces should not contain NEWNET")
}
if !ns.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
withNil := Namespaces{
GetNamespace("UNDEFINED"), // this element will be nil
GetNamespace("NEWPID"),
}
if !withNil.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
}
func TestCapabilitiesContains(t *testing.T) {
caps := Capabilities{
GetCapability("MKNOD"),
GetCapability("SETPCAP"),
}
if caps.Contains("SYS_ADMIN") {
t.Fatal("capabilities should not contain SYS_ADMIN")
}
if !caps.Contains("MKNOD") {
t.Fatal("capabilities should contain MKNOD but does not")
}
}

View file

@ -1,28 +0,0 @@
package utils
import (
"crypto/rand"
"encoding/hex"
"io"
"path/filepath"
)
// GenerateRandomName returns a new name joined with a prefix. This size
// specified is used to truncate the randomly generated value
func GenerateRandomName(prefix string, size int) (string, error) {
id := make([]byte, 32)
if _, err := io.ReadFull(rand.Reader, id); err != nil {
return "", err
}
return prefix + hex.EncodeToString(id)[:size], nil
}
// ResolveRootfs ensures that the current working directory is
// not a symlink and returns the absolute path to the rootfs
func ResolveRootfs(uncleanRootfs string) (string, error) {
rootfs, err := filepath.Abs(uncleanRootfs)
if err != nil {
return "", err
}
return filepath.EvalSymlinks(rootfs)
}