Remove and vendor libcontainer
Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
		
							parent
							
								
									c9ed49398e
								
							
						
					
					
						commit
						8a186aa82b
					
				
					 82 changed files with 0 additions and 6936 deletions
				
			
		|  | @ -1,4 +0,0 @@ | |||
| Michael Crosby <michael@crosbymichael.com> (@crosbymichael) | ||||
| Guillaume J. Charmes <guillaume@docker.com> (@creack) | ||||
| Rohit Jnagal <jnagal@google.com> (@rjnagal) | ||||
| Victor Marmol <vmarmol@google.com> (@vmarmol) | ||||
|  | @ -1,37 +0,0 @@ | |||
| ## libcontainer - reference implementation for containers | ||||
| 
 | ||||
| #### background | ||||
| 
 | ||||
| libcontainer specifies configuration options for what a container is.  It provides a native Go implementation  | ||||
| for using Linux namespaces with no external dependencies.  libcontainer provides many convenience functions for working with namespaces, networking, and management.   | ||||
| 
 | ||||
| 
 | ||||
| #### container | ||||
| A container is a self contained directory that is able to run one or more processes without  | ||||
| affecting the host system.  The directory is usually a full system tree.  Inside the directory | ||||
| a `container.json` file is placed with the runtime configuration for how the processes  | ||||
| should be contained and ran.  Environment, networking, and different capabilities for the  | ||||
| process are specified in this file.  The configuration is used for each process executed inside the container. | ||||
| 
 | ||||
| See the `container.json` file for what the configuration should look like. | ||||
| 
 | ||||
| Using this configuration and the current directory holding the rootfs for a process, one can use libcontainer to exec the container. Running the life of the namespace, a `pid` file  | ||||
| is written to the current directory with the pid of the namespaced process to the external world.  A client can use this pid to wait, kill, or perform other operation with the container.  If a user tries to run a new process inside an existing container with a live namespace, the namespace will be joined by the new process. | ||||
| 
 | ||||
| You may also specify an alternate root place where the `container.json` file is read and where the `pid` file will be saved. | ||||
| 
 | ||||
| #### nsinit | ||||
| 
 | ||||
| `nsinit` is a cli application used as the reference implementation of libcontainer.  It is able to  | ||||
| spawn or join new containers giving the current directory.  To use `nsinit` cd into a Linux  | ||||
| rootfs and copy a `container.json` file into the directory with your specified configuration. | ||||
| 
 | ||||
| To execute `/bin/bash` in the current directory as a container just run: | ||||
| ```bash | ||||
| nsinit exec /bin/bash | ||||
| ``` | ||||
| 
 | ||||
| If you wish to spawn another process inside the container while your current bash session is  | ||||
| running just run the exact same command again to get another bash shell or change the command.  If the original process dies, PID 1, all other processes spawned inside the container will also be killed and the namespace will be removed.  | ||||
| 
 | ||||
| You can identify if a process is running in a container by looking to see if `pid` is in the root of the directory.    | ||||
|  | @ -1,11 +0,0 @@ | |||
| #### goals | ||||
| * small and simple - line count is not everything but less code is better | ||||
| * provide primitives for working with namespaces not cater to every option | ||||
| * extend via configuration not by features - host networking, no networking, veth network can be accomplished via adjusting the container.json, nothing to do with code | ||||
| 
 | ||||
| #### tasks | ||||
| * reexec or raw syscalls for new process in existing container | ||||
| * example configs for different setups (host networking, boot init) | ||||
| * improve pkg documentation with comments | ||||
| * testing - this is hard in a low level pkg but we could do some, maybe | ||||
| * selinux | ||||
|  | @ -1,35 +0,0 @@ | |||
| // +build apparmor,linux,amd64 | ||||
| 
 | ||||
| package apparmor | ||||
| 
 | ||||
| // #cgo LDFLAGS: -lapparmor | ||||
| // #include <sys/apparmor.h> | ||||
| // #include <stdlib.h> | ||||
| import "C" | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"unsafe" | ||||
| ) | ||||
| 
 | ||||
| func IsEnabled() bool { | ||||
| 	if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" { | ||||
| 		buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") | ||||
| 		return err == nil && len(buf) > 1 && buf[0] == 'Y' | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| 
 | ||||
| func ApplyProfile(name string) error { | ||||
| 	if name == "" { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	cName := C.CString(name) | ||||
| 	defer C.free(unsafe.Pointer(cName)) | ||||
| 
 | ||||
| 	if _, err := C.aa_change_onexec(cName); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,11 +0,0 @@ | |||
| // +build !apparmor !linux !amd64 | ||||
| 
 | ||||
| package apparmor | ||||
| 
 | ||||
| func IsEnabled() bool { | ||||
| 	return false | ||||
| } | ||||
| 
 | ||||
| func ApplyProfile(name string) error { | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,94 +0,0 @@ | |||
| package apparmor | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"text/template" | ||||
| ) | ||||
| 
 | ||||
| type data struct { | ||||
| 	Name         string | ||||
| 	Imports      []string | ||||
| 	InnerImports []string | ||||
| } | ||||
| 
 | ||||
| const baseTemplate = ` | ||||
| {{range $value := .Imports}} | ||||
| {{$value}} | ||||
| {{end}} | ||||
| 
 | ||||
| profile {{.Name}} flags=(attach_disconnected,mediate_deleted) { | ||||
| {{range $value := .InnerImports}} | ||||
|   {{$value}} | ||||
| {{end}} | ||||
| 
 | ||||
|   network, | ||||
|   capability, | ||||
|   file, | ||||
|   umount, | ||||
| 
 | ||||
|   mount fstype=tmpfs, | ||||
|   mount fstype=mqueue, | ||||
|   mount fstype=fuse.*, | ||||
|   mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/, | ||||
|   mount fstype=efivarfs -> /sys/firmware/efi/efivars/, | ||||
|   mount fstype=fusectl -> /sys/fs/fuse/connections/, | ||||
|   mount fstype=securityfs -> /sys/kernel/security/, | ||||
|   mount fstype=debugfs -> /sys/kernel/debug/, | ||||
|   mount fstype=proc -> /proc/, | ||||
|   mount fstype=sysfs -> /sys/, | ||||
| 
 | ||||
|   deny @{PROC}/sys/fs/** wklx, | ||||
|   deny @{PROC}/sysrq-trigger rwklx, | ||||
|   deny @{PROC}/mem rwklx, | ||||
|   deny @{PROC}/kmem rwklx, | ||||
|   deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, | ||||
|   deny @{PROC}/sys/kernel/*/** wklx, | ||||
| 
 | ||||
|   deny mount options=(ro, remount) -> /, | ||||
|   deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, | ||||
|   deny mount fstype=devpts, | ||||
| 
 | ||||
|   deny /sys/[^f]*/** wklx, | ||||
|   deny /sys/f[^s]*/** wklx, | ||||
|   deny /sys/fs/[^c]*/** wklx, | ||||
|   deny /sys/fs/c[^g]*/** wklx, | ||||
|   deny /sys/fs/cg[^r]*/** wklx, | ||||
|   deny /sys/firmware/efi/efivars/** rwklx, | ||||
|   deny /sys/kernel/security/** rwklx, | ||||
| } | ||||
| ` | ||||
| 
 | ||||
| func generateProfile(out io.Writer) error { | ||||
| 	compiled, err := template.New("apparmor_profile").Parse(baseTemplate) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	data := &data{ | ||||
| 		Name: "docker-default", | ||||
| 	} | ||||
| 	if tuntablesExists() { | ||||
| 		data.Imports = append(data.Imports, "#include <tunables/global>") | ||||
| 	} else { | ||||
| 		data.Imports = append(data.Imports, "@{PROC}=/proc/") | ||||
| 	} | ||||
| 	if abstrctionsEsists() { | ||||
| 		data.InnerImports = append(data.InnerImports, "#include <abstractions/base>") | ||||
| 	} | ||||
| 	if err := compiled.Execute(out, data); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // check if the tunables/global exist | ||||
| func tuntablesExists() bool { | ||||
| 	_, err := os.Stat("/etc/apparmor.d/tunables/global") | ||||
| 	return err == nil | ||||
| } | ||||
| 
 | ||||
| // check if abstractions/base exist | ||||
| func abstrctionsEsists() bool { | ||||
| 	_, err := os.Stat("/etc/apparmor.d/abstractions/base") | ||||
| 	return err == nil | ||||
| } | ||||
|  | @ -1,44 +0,0 @@ | |||
| package apparmor | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"path" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	DefaultProfilePath = "/etc/apparmor.d/docker" | ||||
| ) | ||||
| 
 | ||||
| func InstallDefaultProfile() error { | ||||
| 	if !IsEnabled() { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	// Make sure /etc/apparmor.d exists | ||||
| 	if err := os.MkdirAll(path.Dir(DefaultProfilePath), 0755); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	f, err := os.OpenFile(DefaultProfilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := generateProfile(f); err != nil { | ||||
| 		f.Close() | ||||
| 		return err | ||||
| 	} | ||||
| 	f.Close() | ||||
| 
 | ||||
| 	cmd := exec.Command("/sbin/apparmor_parser", "-r", "-W", "docker") | ||||
| 	// to use the parser directly we have to make sure we are in the correct | ||||
| 	// dir with the profile | ||||
| 	cmd.Dir = "/etc/apparmor.d" | ||||
| 
 | ||||
| 	output, err := cmd.CombinedOutput() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("Error loading docker apparmor profile: %s (%s)", err, output) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,40 +0,0 @@ | |||
| package cgroups | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/devices" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	ErrNotFound = errors.New("mountpoint not found") | ||||
| ) | ||||
| 
 | ||||
| type FreezerState string | ||||
| 
 | ||||
| const ( | ||||
| 	Undefined FreezerState = "" | ||||
| 	Frozen    FreezerState = "FROZEN" | ||||
| 	Thawed    FreezerState = "THAWED" | ||||
| ) | ||||
| 
 | ||||
| type Cgroup struct { | ||||
| 	Name   string `json:"name,omitempty"` | ||||
| 	Parent string `json:"parent,omitempty"` // name of parent cgroup or slice | ||||
| 
 | ||||
| 	AllowAllDevices   bool              `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container.  If false, allow access only to devices explicitly listed in the allowed_devices list. | ||||
| 	AllowedDevices    []*devices.Device `json:"allowed_devices,omitempty"` | ||||
| 	Memory            int64             `json:"memory,omitempty"`             // Memory limit (in bytes) | ||||
| 	MemoryReservation int64             `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes) | ||||
| 	MemorySwap        int64             `json:"memory_swap,omitempty"`        // Total memory usage (memory + swap); set `-1' to disable swap | ||||
| 	CpuShares         int64             `json:"cpu_shares,omitempty"`         // CPU shares (relative weight vs. other containers) | ||||
| 	CpuQuota          int64             `json:"cpu_quota,omitempty"`          // CPU hardcap limit (in usecs). Allowed cpu time in a given period. | ||||
| 	CpuPeriod         int64             `json:"cpu_period,omitempty"`         // CPU period to be used for hardcapping (in usecs). 0 to use system default. | ||||
| 	CpusetCpus        string            `json:"cpuset_cpus,omitempty"`        // CPU to use | ||||
| 	Freezer           FreezerState      `json:"freezer,omitempty"`            // set the freeze value for the process | ||||
| 	Slice             string            `json:"slice,omitempty"`              // Parent slice to use for systemd | ||||
| } | ||||
| 
 | ||||
| type ActiveCgroup interface { | ||||
| 	Cleanup() error | ||||
| } | ||||
|  | @ -1,27 +0,0 @@ | |||
| package cgroups | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	cgroupsContents = `11:hugetlb:/ | ||||
| 10:perf_event:/ | ||||
| 9:blkio:/ | ||||
| 8:net_cls:/ | ||||
| 7:freezer:/ | ||||
| 6:devices:/ | ||||
| 5:memory:/ | ||||
| 4:cpuacct,cpu:/ | ||||
| 3:cpuset:/ | ||||
| 2:name=systemd:/user.slice/user-1000.slice/session-16.scope` | ||||
| ) | ||||
| 
 | ||||
| func TestParseCgroups(t *testing.T) { | ||||
| 	r := bytes.NewBuffer([]byte(cgroupsContents)) | ||||
| 	_, err := parseCgroupFile("blkio", r) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| } | ||||
|  | @ -1,180 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	subsystems = map[string]subsystem{ | ||||
| 		"devices":    &devicesGroup{}, | ||||
| 		"memory":     &memoryGroup{}, | ||||
| 		"cpu":        &cpuGroup{}, | ||||
| 		"cpuset":     &cpusetGroup{}, | ||||
| 		"cpuacct":    &cpuacctGroup{}, | ||||
| 		"blkio":      &blkioGroup{}, | ||||
| 		"perf_event": &perfEventGroup{}, | ||||
| 		"freezer":    &freezerGroup{}, | ||||
| 	} | ||||
| ) | ||||
| 
 | ||||
| type subsystem interface { | ||||
| 	Set(*data) error | ||||
| 	Remove(*data) error | ||||
| 	GetStats(*data, *cgroups.Stats) error | ||||
| } | ||||
| 
 | ||||
| type data struct { | ||||
| 	root   string | ||||
| 	cgroup string | ||||
| 	c      *cgroups.Cgroup | ||||
| 	pid    int | ||||
| } | ||||
| 
 | ||||
| func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { | ||||
| 	d, err := getCgroupData(c, pid) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	for _, sys := range subsystems { | ||||
| 		if err := sys.Set(d); err != nil { | ||||
| 			d.Cleanup() | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return d, nil | ||||
| } | ||||
| 
 | ||||
| func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) { | ||||
| 	stats := cgroups.NewStats() | ||||
| 
 | ||||
| 	d, err := getCgroupData(c, 0) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	for _, sys := range subsystems { | ||||
| 		if err := sys.GetStats(d, stats); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return stats, nil | ||||
| } | ||||
| 
 | ||||
| // Freeze toggles the container's freezer cgroup depending on the state | ||||
| // provided | ||||
| func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { | ||||
| 	d, err := getCgroupData(c, 0) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	c.Freezer = state | ||||
| 
 | ||||
| 	freezer := subsystems["freezer"] | ||||
| 
 | ||||
| 	return freezer.Set(d) | ||||
| } | ||||
| 
 | ||||
| func GetPids(c *cgroups.Cgroup) ([]int, error) { | ||||
| 	d, err := getCgroupData(c, 0) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	dir, err := d.path("devices") | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return cgroups.ReadProcsFile(dir) | ||||
| } | ||||
| 
 | ||||
| func getCgroupData(c *cgroups.Cgroup, pid int) (*data, error) { | ||||
| 	// we can pick any subsystem to find the root | ||||
| 	cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu") | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	cgroupRoot = filepath.Dir(cgroupRoot) | ||||
| 
 | ||||
| 	if _, err := os.Stat(cgroupRoot); err != nil { | ||||
| 		return nil, fmt.Errorf("cgroups fs not found") | ||||
| 	} | ||||
| 
 | ||||
| 	cgroup := c.Name | ||||
| 	if c.Parent != "" { | ||||
| 		cgroup = filepath.Join(c.Parent, cgroup) | ||||
| 	} | ||||
| 
 | ||||
| 	return &data{ | ||||
| 		root:   cgroupRoot, | ||||
| 		cgroup: cgroup, | ||||
| 		c:      c, | ||||
| 		pid:    pid, | ||||
| 	}, nil | ||||
| } | ||||
| 
 | ||||
| func (raw *data) parent(subsystem string) (string, error) { | ||||
| 	initPath, err := cgroups.GetInitCgroupDir(subsystem) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return filepath.Join(raw.root, subsystem, initPath), nil | ||||
| } | ||||
| 
 | ||||
| func (raw *data) path(subsystem string) (string, error) { | ||||
| 	parent, err := raw.parent(subsystem) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return filepath.Join(parent, raw.cgroup), nil | ||||
| } | ||||
| 
 | ||||
| func (raw *data) join(subsystem string) (string, error) { | ||||
| 	path, err := raw.path(subsystem) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	if err := writeFile(path, "cgroup.procs", strconv.Itoa(raw.pid)); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return path, nil | ||||
| } | ||||
| 
 | ||||
| func (raw *data) Cleanup() error { | ||||
| 	for _, sys := range subsystems { | ||||
| 		sys.Remove(raw) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func writeFile(dir, file, data string) error { | ||||
| 	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) | ||||
| } | ||||
| 
 | ||||
| func readFile(dir, file string) (string, error) { | ||||
| 	data, err := ioutil.ReadFile(filepath.Join(dir, file)) | ||||
| 	return string(data), err | ||||
| } | ||||
| 
 | ||||
| func removePath(p string, err error) error { | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if p != "" { | ||||
| 		return os.RemoveAll(p) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,142 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| type blkioGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *blkioGroup) Set(d *data) error { | ||||
| 	// we just want to join this group even though we don't set anything | ||||
| 	if _, err := d.join("blkio"); err != nil && err != cgroups.ErrNotFound { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *blkioGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("blkio")) | ||||
| } | ||||
| 
 | ||||
| /* | ||||
| examples: | ||||
| 
 | ||||
|     blkio.sectors | ||||
|     8:0 6792 | ||||
| 
 | ||||
|     blkio.io_service_bytes | ||||
|     8:0 Read 1282048 | ||||
|     8:0 Write 2195456 | ||||
|     8:0 Sync 2195456 | ||||
|     8:0 Async 1282048 | ||||
|     8:0 Total 3477504 | ||||
|     Total 3477504 | ||||
| 
 | ||||
|     blkio.io_serviced | ||||
|     8:0 Read 124 | ||||
|     8:0 Write 104 | ||||
|     8:0 Sync 104 | ||||
|     8:0 Async 124 | ||||
|     8:0 Total 228 | ||||
|     Total 228 | ||||
| 
 | ||||
|     blkio.io_queued | ||||
|     8:0 Read 0 | ||||
|     8:0 Write 0 | ||||
|     8:0 Sync 0 | ||||
|     8:0 Async 0 | ||||
|     8:0 Total 0 | ||||
|     Total 0 | ||||
| */ | ||||
| 
 | ||||
| func splitBlkioStatLine(r rune) bool { | ||||
| 	return r == ' ' || r == ':' | ||||
| } | ||||
| 
 | ||||
| func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { | ||||
| 	var blkioStats []cgroups.BlkioStatEntry | ||||
| 	f, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	sc := bufio.NewScanner(f) | ||||
| 	for sc.Scan() { | ||||
| 		// format: dev type amount | ||||
| 		fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) | ||||
| 		if len(fields) < 3 { | ||||
| 			if len(fields) == 2 && fields[0] == "Total" { | ||||
| 				// skip total line | ||||
| 				continue | ||||
| 			} else { | ||||
| 				return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		v, err := strconv.ParseUint(fields[0], 10, 64) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		major := v | ||||
| 
 | ||||
| 		v, err = strconv.ParseUint(fields[1], 10, 64) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		minor := v | ||||
| 
 | ||||
| 		op := "" | ||||
| 		valueField := 2 | ||||
| 		if len(fields) == 4 { | ||||
| 			op = fields[2] | ||||
| 			valueField = 3 | ||||
| 		} | ||||
| 		v, err = strconv.ParseUint(fields[valueField], 10, 64) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) | ||||
| 	} | ||||
| 
 | ||||
| 	return blkioStats, nil | ||||
| } | ||||
| 
 | ||||
| func (s *blkioGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	var blkioStats []cgroups.BlkioStatEntry | ||||
| 	var err error | ||||
| 	path, err := d.path("blkio") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.BlkioStats.SectorsRecursive = blkioStats | ||||
| 
 | ||||
| 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.BlkioStats.IoServiceBytesRecursive = blkioStats | ||||
| 
 | ||||
| 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.BlkioStats.IoServicedRecursive = blkioStats | ||||
| 
 | ||||
| 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.BlkioStats.IoQueuedRecursive = blkioStats | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,174 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	sectorsRecursiveContents      = `8:0 1024` | ||||
| 	serviceBytesRecursiveContents = `8:0 Read 100 | ||||
| 8:0 Write 200 | ||||
| 8:0 Sync 300 | ||||
| 8:0 Async 500 | ||||
| 8:0 Total 500 | ||||
| Total 500` | ||||
| 	servicedRecursiveContents = `8:0 Read 10 | ||||
| 8:0 Write 40 | ||||
| 8:0 Sync 20 | ||||
| 8:0 Async 30 | ||||
| 8:0 Total 50 | ||||
| Total 50` | ||||
| 	queuedRecursiveContents = `8:0 Read 1 | ||||
| 8:0 Write 4 | ||||
| 8:0 Sync 2 | ||||
| 8:0 Async 3 | ||||
| 8:0 Total 5 | ||||
| Total 5` | ||||
| ) | ||||
| 
 | ||||
| var actualStats = *cgroups.NewStats() | ||||
| 
 | ||||
| func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) { | ||||
| 	*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op}) | ||||
| } | ||||
| 
 | ||||
| func TestBlkioStats(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("blkio", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, | ||||
| 		"blkio.io_serviced_recursive":      servicedRecursiveContents, | ||||
| 		"blkio.io_queued_recursive":        queuedRecursiveContents, | ||||
| 		"blkio.sectors_recursive":          sectorsRecursiveContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	blkio := &blkioGroup{} | ||||
| 	err := blkio.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	// Verify expected stats. | ||||
| 	expectedStats := cgroups.BlkioStats{} | ||||
| 	appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "") | ||||
| 
 | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total") | ||||
| 
 | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total") | ||||
| 
 | ||||
| 	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async") | ||||
| 	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total") | ||||
| 
 | ||||
| 	expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) | ||||
| } | ||||
| 
 | ||||
| func TestBlkioStatsNoSectorsFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("blkio", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, | ||||
| 		"blkio.io_serviced_recursive":      servicedRecursiveContents, | ||||
| 		"blkio.io_queued_recursive":        queuedRecursiveContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	blkio := &blkioGroup{} | ||||
| 	err := blkio.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected to fail, but did not") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestBlkioStatsNoServiceBytesFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("blkio", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"blkio.io_serviced_recursive": servicedRecursiveContents, | ||||
| 		"blkio.io_queued_recursive":   queuedRecursiveContents, | ||||
| 		"blkio.sectors_recursive":     sectorsRecursiveContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	blkio := &blkioGroup{} | ||||
| 	err := blkio.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected to fail, but did not") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestBlkioStatsNoServicedFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("blkio", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, | ||||
| 		"blkio.io_queued_recursive":        queuedRecursiveContents, | ||||
| 		"blkio.sectors_recursive":          sectorsRecursiveContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	blkio := &blkioGroup{} | ||||
| 	err := blkio.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected to fail, but did not") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestBlkioStatsNoQueuedFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("blkio", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, | ||||
| 		"blkio.io_serviced_recursive":      servicedRecursiveContents, | ||||
| 		"blkio.sectors_recursive":          sectorsRecursiveContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	blkio := &blkioGroup{} | ||||
| 	err := blkio.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected to fail, but did not") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("blkio", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"blkio.io_service_bytes_recursive": "8:0 Read 100 100", | ||||
| 		"blkio.io_serviced_recursive":      servicedRecursiveContents, | ||||
| 		"blkio.io_queued_recursive":        queuedRecursiveContents, | ||||
| 		"blkio.sectors_recursive":          sectorsRecursiveContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	blkio := &blkioGroup{} | ||||
| 	err := blkio.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected to fail, but did not") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestBlkioStatsUnexpectedFieldType(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("blkio", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"blkio.io_service_bytes_recursive": "8:0 Read Write", | ||||
| 		"blkio.io_serviced_recursive":      servicedRecursiveContents, | ||||
| 		"blkio.io_queued_recursive":        queuedRecursiveContents, | ||||
| 		"blkio.sectors_recursive":          sectorsRecursiveContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	blkio := &blkioGroup{} | ||||
| 	err := blkio.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected to fail, but did not") | ||||
| 	} | ||||
| } | ||||
|  | @ -1,78 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| type cpuGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *cpuGroup) Set(d *data) error { | ||||
| 	// We always want to join the cpu group, to allow fair cpu scheduling | ||||
| 	// on a container basis | ||||
| 	dir, err := d.join("cpu") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if d.c.CpuShares != 0 { | ||||
| 		if err := writeFile(dir, "cpu.shares", strconv.FormatInt(d.c.CpuShares, 10)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if d.c.CpuPeriod != 0 { | ||||
| 		if err := writeFile(dir, "cpu.cfs_period_us", strconv.FormatInt(d.c.CpuPeriod, 10)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if d.c.CpuQuota != 0 { | ||||
| 		if err := writeFile(dir, "cpu.cfs_quota_us", strconv.FormatInt(d.c.CpuQuota, 10)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *cpuGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("cpu")) | ||||
| } | ||||
| 
 | ||||
| func (s *cpuGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	path, err := d.path("cpu") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	f, err := os.Open(filepath.Join(path, "cpu.stat")) | ||||
| 	if err != nil { | ||||
| 		if pathErr, ok := err.(*os.PathError); ok && pathErr.Err == syscall.ENOENT { | ||||
| 			return nil | ||||
| 		} | ||||
| 		return err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	sc := bufio.NewScanner(f) | ||||
| 	for sc.Scan() { | ||||
| 		t, v, err := getCgroupParamKeyValue(sc.Text()) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		switch t { | ||||
| 		case "nr_periods": | ||||
| 			stats.CpuStats.ThrottlingData.Periods = v | ||||
| 
 | ||||
| 		case "nr_throttled": | ||||
| 			stats.CpuStats.ThrottlingData.ThrottledPeriods = v | ||||
| 
 | ||||
| 		case "throttled_time": | ||||
| 			stats.CpuStats.ThrottlingData.ThrottledTime = v | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,66 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| func TestCpuStats(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("cpu", t) | ||||
| 	defer helper.cleanup() | ||||
| 
 | ||||
| 	const ( | ||||
| 		kNrPeriods     = 2000 | ||||
| 		kNrThrottled   = 200 | ||||
| 		kThrottledTime = uint64(18446744073709551615) | ||||
| 	) | ||||
| 
 | ||||
| 	cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n", | ||||
| 		kNrPeriods, kNrThrottled, kThrottledTime) | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"cpu.stat": cpuStatContent, | ||||
| 	}) | ||||
| 
 | ||||
| 	cpu := &cpuGroup{} | ||||
| 	err := cpu.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	expectedStats := cgroups.ThrottlingData{ | ||||
| 		Periods:          kNrPeriods, | ||||
| 		ThrottledPeriods: kNrThrottled, | ||||
| 		ThrottledTime:    kThrottledTime} | ||||
| 
 | ||||
| 	expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData) | ||||
| } | ||||
| 
 | ||||
| func TestNoCpuStatFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("cpu", t) | ||||
| 	defer helper.cleanup() | ||||
| 
 | ||||
| 	cpu := &cpuGroup{} | ||||
| 	err := cpu.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err != nil { | ||||
| 		t.Fatal("Expected not to fail, but did") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestInvalidCpuStat(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("cpu", t) | ||||
| 	defer helper.cleanup() | ||||
| 	cpuStatContent := `nr_periods 2000 | ||||
| 	nr_throttled 200 | ||||
| 	throttled_time fortytwo` | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"cpu.stat": cpuStatContent, | ||||
| 	}) | ||||
| 
 | ||||
| 	cpu := &cpuGroup{} | ||||
| 	err := cpu.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected failed stat parsing.") | ||||
| 	} | ||||
| } | ||||
|  | @ -1,162 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"runtime" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	cpuCount   = uint64(runtime.NumCPU()) | ||||
| 	clockTicks = uint64(system.GetClockTicks()) | ||||
| ) | ||||
| 
 | ||||
| const nanosecondsInSecond = 1000000000 | ||||
| 
 | ||||
| type cpuacctGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *cpuacctGroup) Set(d *data) error { | ||||
| 	// we just want to join this group even though we don't set anything | ||||
| 	if _, err := d.join("cpuacct"); err != nil && err != cgroups.ErrNotFound { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *cpuacctGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("cpuacct")) | ||||
| } | ||||
| 
 | ||||
| func (s *cpuacctGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	var ( | ||||
| 		startCpu, lastCpu, startSystem, lastSystem, startUsage, lastUsage, kernelModeUsage, userModeUsage, percentage uint64 | ||||
| 	) | ||||
| 	path, err := d.path("cpuacct") | ||||
| 	if kernelModeUsage, userModeUsage, err = s.getCpuUsage(d, path); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	startCpu = kernelModeUsage + userModeUsage | ||||
| 	if startSystem, err = s.getSystemCpuUsage(d); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	startUsageTime := time.Now() | ||||
| 	if startUsage, err = getCgroupParamInt(path, "cpuacct.usage"); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// sample for 100ms | ||||
| 	time.Sleep(100 * time.Millisecond) | ||||
| 	if kernelModeUsage, userModeUsage, err = s.getCpuUsage(d, path); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	lastCpu = kernelModeUsage + userModeUsage | ||||
| 	if lastSystem, err = s.getSystemCpuUsage(d); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	usageSampleDuration := time.Since(startUsageTime) | ||||
| 	if lastUsage, err = getCgroupParamInt(path, "cpuacct.usage"); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	var ( | ||||
| 		deltaProc   = lastCpu - startCpu | ||||
| 		deltaSystem = lastSystem - startSystem | ||||
| 		deltaUsage  = lastUsage - startUsage | ||||
| 	) | ||||
| 	if deltaSystem > 0.0 { | ||||
| 		percentage = ((deltaProc / deltaSystem) * clockTicks) * cpuCount | ||||
| 	} | ||||
| 	// NOTE: a percentage over 100% is valid for POSIX because that means the | ||||
| 	// processes is using multiple cores | ||||
| 	stats.CpuStats.CpuUsage.PercentUsage = percentage | ||||
| 	// Delta usage is in nanoseconds of CPU time so get the usage (in cores) over the sample time. | ||||
| 	stats.CpuStats.CpuUsage.CurrentUsage = deltaUsage / uint64(usageSampleDuration.Nanoseconds()) | ||||
| 	percpuUsage, err := s.getPercpuUsage(path) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage | ||||
| 	stats.CpuStats.CpuUsage.UsageInKernelmode = (kernelModeUsage * nanosecondsInSecond) / clockTicks | ||||
| 	stats.CpuStats.CpuUsage.UsageInUsermode = (userModeUsage * nanosecondsInSecond) / clockTicks | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // TODO(vmarmol): Use cgroups stats. | ||||
| func (s *cpuacctGroup) getSystemCpuUsage(d *data) (uint64, error) { | ||||
| 
 | ||||
| 	f, err := os.Open("/proc/stat") | ||||
| 	if err != nil { | ||||
| 		return 0, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	sc := bufio.NewScanner(f) | ||||
| 	for sc.Scan() { | ||||
| 		parts := strings.Fields(sc.Text()) | ||||
| 		switch parts[0] { | ||||
| 		case "cpu": | ||||
| 			if len(parts) < 8 { | ||||
| 				return 0, fmt.Errorf("invalid number of cpu fields") | ||||
| 			} | ||||
| 
 | ||||
| 			var total uint64 | ||||
| 			for _, i := range parts[1:8] { | ||||
| 				v, err := strconv.ParseUint(i, 10, 64) | ||||
| 				if err != nil { | ||||
| 					return 0.0, fmt.Errorf("Unable to convert value %s to int: %s", i, err) | ||||
| 				} | ||||
| 				total += v | ||||
| 			} | ||||
| 			return total, nil | ||||
| 		default: | ||||
| 			continue | ||||
| 		} | ||||
| 	} | ||||
| 	return 0, fmt.Errorf("invalid stat format") | ||||
| } | ||||
| 
 | ||||
| func (s *cpuacctGroup) getCpuUsage(d *data, path string) (uint64, uint64, error) { | ||||
| 	kernelModeUsage := uint64(0) | ||||
| 	userModeUsage := uint64(0) | ||||
| 	data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.stat")) | ||||
| 	if err != nil { | ||||
| 		return 0, 0, err | ||||
| 	} | ||||
| 	fields := strings.Fields(string(data)) | ||||
| 	if len(fields) != 4 { | ||||
| 		return 0, 0, fmt.Errorf("Failure - %s is expected to have 4 fields", filepath.Join(path, "cpuacct.stat")) | ||||
| 	} | ||||
| 	if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { | ||||
| 		return 0, 0, err | ||||
| 	} | ||||
| 	if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { | ||||
| 		return 0, 0, err | ||||
| 	} | ||||
| 
 | ||||
| 	return kernelModeUsage, userModeUsage, nil | ||||
| } | ||||
| 
 | ||||
| func (s *cpuacctGroup) getPercpuUsage(path string) ([]uint64, error) { | ||||
| 	percpuUsage := []uint64{} | ||||
| 	data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) | ||||
| 	if err != nil { | ||||
| 		return percpuUsage, err | ||||
| 	} | ||||
| 	for _, value := range strings.Fields(string(data)) { | ||||
| 		value, err := strconv.ParseUint(value, 10, 64) | ||||
| 		if err != nil { | ||||
| 			return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) | ||||
| 		} | ||||
| 		percpuUsage = append(percpuUsage, value) | ||||
| 	} | ||||
| 	return percpuUsage, nil | ||||
| } | ||||
|  | @ -1,110 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| type cpusetGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *cpusetGroup) Set(d *data) error { | ||||
| 	// we don't want to join this cgroup unless it is specified | ||||
| 	if d.c.CpusetCpus != "" { | ||||
| 		dir, err := d.path("cpuset") | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if err := s.ensureParent(dir); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		// because we are not using d.join we need to place the pid into the procs file | ||||
| 		// unlike the other subsystems | ||||
| 		if err := writeFile(dir, "cgroup.procs", strconv.Itoa(d.pid)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if err := writeFile(dir, "cpuset.cpus", d.c.CpusetCpus); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *cpusetGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("cpuset")) | ||||
| } | ||||
| 
 | ||||
| func (s *cpusetGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *cpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { | ||||
| 	if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	return cpus, mems, nil | ||||
| } | ||||
| 
 | ||||
| // ensureParent ensures that the parent directory of current is created | ||||
| // with the proper cpus and mems files copied from it's parent if the values | ||||
| // are a file with a new line char | ||||
| func (s *cpusetGroup) ensureParent(current string) error { | ||||
| 	parent := filepath.Dir(current) | ||||
| 
 | ||||
| 	if _, err := os.Stat(parent); err != nil { | ||||
| 		if !os.IsNotExist(err) { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		if err := s.ensureParent(parent); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if err := os.MkdirAll(current, 0755); err != nil && !os.IsExist(err) { | ||||
| 		return err | ||||
| 	} | ||||
| 	return s.copyIfNeeded(current, parent) | ||||
| } | ||||
| 
 | ||||
| // copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent | ||||
| // directory to the current directory if the file's contents are 0 | ||||
| func (s *cpusetGroup) copyIfNeeded(current, parent string) error { | ||||
| 	var ( | ||||
| 		err                      error | ||||
| 		currentCpus, currentMems []byte | ||||
| 		parentCpus, parentMems   []byte | ||||
| 	) | ||||
| 
 | ||||
| 	if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if s.isEmpty(currentCpus) { | ||||
| 		if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if s.isEmpty(currentMems) { | ||||
| 		if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *cpusetGroup) isEmpty(b []byte) bool { | ||||
| 	return len(bytes.Trim(b, "\n")) == 0 | ||||
| } | ||||
|  | @ -1,34 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import "github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| 
 | ||||
| type devicesGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *devicesGroup) Set(d *data) error { | ||||
| 	dir, err := d.join("devices") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if !d.c.AllowAllDevices { | ||||
| 		if err := writeFile(dir, "devices.deny", "a"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		for _, dev := range d.c.AllowedDevices { | ||||
| 			if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *devicesGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("devices")) | ||||
| } | ||||
| 
 | ||||
| func (s *devicesGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,71 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"path/filepath" | ||||
| 	"strings" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| type freezerGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *freezerGroup) Set(d *data) error { | ||||
| 	switch d.c.Freezer { | ||||
| 	case cgroups.Frozen, cgroups.Thawed: | ||||
| 		dir, err := d.path("freezer") | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		if err := writeFile(dir, "freezer.state", string(d.c.Freezer)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		for { | ||||
| 			state, err := readFile(dir, "freezer.state") | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			if strings.TrimSpace(state) == string(d.c.Freezer) { | ||||
| 				break | ||||
| 			} | ||||
| 			time.Sleep(1 * time.Millisecond) | ||||
| 		} | ||||
| 	default: | ||||
| 		if _, err := d.join("freezer"); err != nil && err != cgroups.ErrNotFound { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *freezerGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("freezer")) | ||||
| } | ||||
| 
 | ||||
| func getFreezerFileData(path string) (string, error) { | ||||
| 	data, err := ioutil.ReadFile(path) | ||||
| 	return strings.TrimSuffix(string(data), "\n"), err | ||||
| } | ||||
| 
 | ||||
| func (s *freezerGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	path, err := d.path("freezer") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	var data string | ||||
| 	if data, err = getFreezerFileData(filepath.Join(path, "freezer.parent_freezing")); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.FreezerStats.ParentState = data | ||||
| 	if data, err = getFreezerFileData(filepath.Join(path, "freezer.self_freezing")); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.FreezerStats.SelfState = data | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,94 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| type memoryGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *memoryGroup) Set(d *data) error { | ||||
| 	dir, err := d.join("memory") | ||||
| 	// only return an error for memory if it was not specified | ||||
| 	if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer func() { | ||||
| 		if err != nil { | ||||
| 			os.RemoveAll(dir) | ||||
| 		} | ||||
| 	}() | ||||
| 
 | ||||
| 	// Only set values if some config was specified. | ||||
| 	if d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0 { | ||||
| 		if d.c.Memory != 0 { | ||||
| 			if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		if d.c.MemoryReservation != 0 { | ||||
| 			if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(d.c.MemoryReservation, 10)); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		// By default, MemorySwap is set to twice the size of RAM. | ||||
| 		// If you want to omit MemorySwap, set it to `-1'. | ||||
| 		if d.c.MemorySwap != -1 { | ||||
| 			if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *memoryGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("memory")) | ||||
| } | ||||
| 
 | ||||
| func (s *memoryGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	path, err := d.path("memory") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// Set stats from memory.stat. | ||||
| 	statsFile, err := os.Open(filepath.Join(path, "memory.stat")) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer statsFile.Close() | ||||
| 
 | ||||
| 	sc := bufio.NewScanner(statsFile) | ||||
| 	for sc.Scan() { | ||||
| 		t, v, err := getCgroupParamKeyValue(sc.Text()) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		stats.MemoryStats.Stats[t] = v | ||||
| 	} | ||||
| 
 | ||||
| 	// Set memory usage and max historical usage. | ||||
| 	value, err := getCgroupParamInt(path, "memory.usage_in_bytes") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.MemoryStats.Usage = value | ||||
| 	value, err = getCgroupParamInt(path, "memory.max_usage_in_bytes") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.MemoryStats.MaxUsage = value | ||||
| 	value, err = getCgroupParamInt(path, "memory.failcnt") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	stats.MemoryStats.Failcnt = value | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,127 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	memoryStatContents = `cache 512 | ||||
| rss 1024` | ||||
| 	memoryUsageContents    = "2048\n" | ||||
| 	memoryMaxUsageContents = "4096\n" | ||||
| 	memoryFailcnt          = "100\n" | ||||
| ) | ||||
| 
 | ||||
| func TestMemoryStats(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("memory", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"memory.stat":               memoryStatContents, | ||||
| 		"memory.usage_in_bytes":     memoryUsageContents, | ||||
| 		"memory.max_usage_in_bytes": memoryMaxUsageContents, | ||||
| 		"memory.failcnt":            memoryFailcnt, | ||||
| 	}) | ||||
| 
 | ||||
| 	memory := &memoryGroup{} | ||||
| 	err := memory.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	expectedStats := cgroups.MemoryStats{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Stats: map[string]uint64{"cache": 512, "rss": 1024}} | ||||
| 	expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats) | ||||
| } | ||||
| 
 | ||||
| func TestMemoryStatsNoStatFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("memory", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"memory.usage_in_bytes":     memoryUsageContents, | ||||
| 		"memory.max_usage_in_bytes": memoryMaxUsageContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	memory := &memoryGroup{} | ||||
| 	err := memory.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected failure") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestMemoryStatsNoUsageFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("memory", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"memory.stat":               memoryStatContents, | ||||
| 		"memory.max_usage_in_bytes": memoryMaxUsageContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	memory := &memoryGroup{} | ||||
| 	err := memory.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected failure") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestMemoryStatsNoMaxUsageFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("memory", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"memory.stat":           memoryStatContents, | ||||
| 		"memory.usage_in_bytes": memoryUsageContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	memory := &memoryGroup{} | ||||
| 	err := memory.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected failure") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestMemoryStatsBadStatFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("memory", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"memory.stat":               "rss rss", | ||||
| 		"memory.usage_in_bytes":     memoryUsageContents, | ||||
| 		"memory.max_usage_in_bytes": memoryMaxUsageContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	memory := &memoryGroup{} | ||||
| 	err := memory.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected failure") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestMemoryStatsBadUsageFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("memory", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"memory.stat":               memoryStatContents, | ||||
| 		"memory.usage_in_bytes":     "bad", | ||||
| 		"memory.max_usage_in_bytes": memoryMaxUsageContents, | ||||
| 	}) | ||||
| 
 | ||||
| 	memory := &memoryGroup{} | ||||
| 	err := memory.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected failure") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestMemoryStatsBadMaxUsageFile(t *testing.T) { | ||||
| 	helper := NewCgroupTestUtil("memory", t) | ||||
| 	defer helper.cleanup() | ||||
| 	helper.writeFileContents(map[string]string{ | ||||
| 		"memory.stat":               memoryStatContents, | ||||
| 		"memory.usage_in_bytes":     memoryUsageContents, | ||||
| 		"memory.max_usage_in_bytes": "bad", | ||||
| 	}) | ||||
| 
 | ||||
| 	memory := &memoryGroup{} | ||||
| 	err := memory.GetStats(helper.CgroupData, &actualStats) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expected failure") | ||||
| 	} | ||||
| } | ||||
|  | @ -1,24 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| type perfEventGroup struct { | ||||
| } | ||||
| 
 | ||||
| func (s *perfEventGroup) Set(d *data) error { | ||||
| 	// we just want to join this group even though we don't set anything | ||||
| 	if _, err := d.join("perf_event"); err != nil && err != cgroups.ErrNotFound { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *perfEventGroup) Remove(d *data) error { | ||||
| 	return removePath(d.path("perf_event")) | ||||
| } | ||||
| 
 | ||||
| func (s *perfEventGroup) GetStats(d *data, stats *cgroups.Stats) error { | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,73 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"log" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error { | ||||
| 	if len(expected) != len(actual) { | ||||
| 		return fmt.Errorf("blkioStatEntries length do not match") | ||||
| 	} | ||||
| 	for i, expValue := range expected { | ||||
| 		actValue := actual[i] | ||||
| 		if expValue != actValue { | ||||
| 			return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) { | ||||
| 	if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil { | ||||
| 		log.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err) | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil { | ||||
| 		log.Printf("blkio IoServicedRecursive do not match - %s\n", err) | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil { | ||||
| 		log.Printf("blkio IoQueuedRecursive do not match - %s\n", err) | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil { | ||||
| 		log.Printf("blkio SectorsRecursive do not match - %s\n", err) | ||||
| 		t.Fail() | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) { | ||||
| 	if expected != actual { | ||||
| 		log.Printf("Expected throttling data %v but found %v\n", expected, actual) | ||||
| 		t.Fail() | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) { | ||||
| 	if expected.Usage != actual.Usage { | ||||
| 		log.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage) | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 	if expected.MaxUsage != actual.MaxUsage { | ||||
| 		log.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage) | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 	for key, expValue := range expected.Stats { | ||||
| 		actValue, ok := actual.Stats[key] | ||||
| 		if !ok { | ||||
| 			log.Printf("Expected memory stat key %s not found\n", key) | ||||
| 			t.Fail() | ||||
| 		} | ||||
| 		if expValue != actValue { | ||||
| 			log.Printf("Expected memory stat value %d but found %d\n", expValue, actValue) | ||||
| 			t.Fail() | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,60 +0,0 @@ | |||
| /* | ||||
| Utility for testing cgroup operations. | ||||
| 
 | ||||
| Creates a mock of the cgroup filesystem for the duration of the test. | ||||
| */ | ||||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| type cgroupTestUtil struct { | ||||
| 	// data to use in tests. | ||||
| 	CgroupData *data | ||||
| 
 | ||||
| 	// Path to the mock cgroup directory. | ||||
| 	CgroupPath string | ||||
| 
 | ||||
| 	// Temporary directory to store mock cgroup filesystem. | ||||
| 	tempDir string | ||||
| 	t       *testing.T | ||||
| } | ||||
| 
 | ||||
| // Creates a new test util for the specified subsystem | ||||
| func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil { | ||||
| 	d := &data{} | ||||
| 	tempDir, err := ioutil.TempDir("", fmt.Sprintf("%s_cgroup_test", subsystem)) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	d.root = tempDir | ||||
| 	testCgroupPath, err := d.path(subsystem) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	// Ensure the full mock cgroup path exists. | ||||
| 	err = os.MkdirAll(testCgroupPath, 0755) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t} | ||||
| } | ||||
| 
 | ||||
| func (c *cgroupTestUtil) cleanup() { | ||||
| 	os.RemoveAll(c.tempDir) | ||||
| } | ||||
| 
 | ||||
| // Write the specified contents on the mock of the specified cgroup files. | ||||
| func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) { | ||||
| 	for file, contents := range fileContents { | ||||
| 		err := writeFile(c.CgroupPath, file, contents) | ||||
| 		if err != nil { | ||||
| 			c.t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,40 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	ErrNotSupportStat = errors.New("stats are not supported for subsystem") | ||||
| 	ErrNotValidFormat = errors.New("line is not a valid key value format") | ||||
| ) | ||||
| 
 | ||||
| // Parses a cgroup param and returns as name, value | ||||
| //  i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 | ||||
| func getCgroupParamKeyValue(t string) (string, uint64, error) { | ||||
| 	parts := strings.Fields(t) | ||||
| 	switch len(parts) { | ||||
| 	case 2: | ||||
| 		value, err := strconv.ParseUint(parts[1], 10, 64) | ||||
| 		if err != nil { | ||||
| 			return "", 0, fmt.Errorf("Unable to convert param value to uint64: %s", err) | ||||
| 		} | ||||
| 		return parts[0], value, nil | ||||
| 	default: | ||||
| 		return "", 0, ErrNotValidFormat | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Gets a single int64 value from the specified cgroup file. | ||||
| func getCgroupParamInt(cgroupPath, cgroupFile string) (uint64, error) { | ||||
| 	contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) | ||||
| 	if err != nil { | ||||
| 		return 0, err | ||||
| 	} | ||||
| 	return strconv.ParseUint(strings.TrimSpace(string(contents)), 10, 64) | ||||
| } | ||||
|  | @ -1,68 +0,0 @@ | |||
| package fs | ||||
| 
 | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	cgroupFile  = "cgroup.file" | ||||
| 	floatValue  = 2048.0 | ||||
| 	floatString = "2048" | ||||
| ) | ||||
| 
 | ||||
| func TestGetCgroupParamsInt(t *testing.T) { | ||||
| 	// Setup tempdir. | ||||
| 	tempDir, err := ioutil.TempDir("", "cgroup_utils_test") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	defer os.RemoveAll(tempDir) | ||||
| 	tempFile := filepath.Join(tempDir, cgroupFile) | ||||
| 
 | ||||
| 	// Success. | ||||
| 	err = ioutil.WriteFile(tempFile, []byte(floatString), 0755) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	value, err := getCgroupParamInt(tempDir, cgroupFile) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} else if value != floatValue { | ||||
| 		t.Fatalf("Expected %f to equal %f", value, floatValue) | ||||
| 	} | ||||
| 
 | ||||
| 	// Success with new line. | ||||
| 	err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	value, err = getCgroupParamInt(tempDir, cgroupFile) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} else if value != floatValue { | ||||
| 		t.Fatalf("Expected %f to equal %f", value, floatValue) | ||||
| 	} | ||||
| 
 | ||||
| 	// Not a float. | ||||
| 	err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	_, err = getCgroupParamInt(tempDir, cgroupFile) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expecting error, got none") | ||||
| 	} | ||||
| 
 | ||||
| 	// Unknown file. | ||||
| 	err = os.Remove(tempFile) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	_, err = getCgroupParamInt(tempDir, cgroupFile) | ||||
| 	if err == nil { | ||||
| 		t.Fatal("Expecting error, got none") | ||||
| 	} | ||||
| } | ||||
|  | @ -1,72 +0,0 @@ | |||
| package cgroups | ||||
| 
 | ||||
| type ThrottlingData struct { | ||||
| 	// Number of periods with throttling active | ||||
| 	Periods uint64 `json:"periods,omitempty"` | ||||
| 	// Number of periods when the container hit its throttling limit. | ||||
| 	ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` | ||||
| 	// Aggregate time the container was throttled for in nanoseconds. | ||||
| 	ThrottledTime uint64 `json:"throttled_time,omitempty"` | ||||
| } | ||||
| 
 | ||||
| type CpuUsage struct { | ||||
| 	// percentage of available CPUs currently being used. | ||||
| 	PercentUsage uint64 `json:"percent_usage,omitempty"` | ||||
| 	// nanoseconds of cpu time consumed over the last 100 ms. | ||||
| 	CurrentUsage uint64   `json:"current_usage,omitempty"` | ||||
| 	PercpuUsage  []uint64 `json:"percpu_usage,omitempty"` | ||||
| 	// Time spent by tasks of the cgroup in kernel mode. Units: nanoseconds. | ||||
| 	UsageInKernelmode uint64 `json:"usage_in_kernelmode"` | ||||
| 	// Time spent by tasks of the cgroup in user mode. Units: nanoseconds. | ||||
| 	UsageInUsermode uint64 `json:"usage_in_usermode"` | ||||
| } | ||||
| 
 | ||||
| type CpuStats struct { | ||||
| 	CpuUsage       CpuUsage       `json:"cpu_usage,omitempty"` | ||||
| 	ThrottlingData ThrottlingData `json:"throlling_data,omitempty"` | ||||
| } | ||||
| 
 | ||||
| type MemoryStats struct { | ||||
| 	// current res_counter usage for memory | ||||
| 	Usage uint64 `json:"usage,omitempty"` | ||||
| 	// maximum usage ever recorded. | ||||
| 	MaxUsage uint64 `json:"max_usage,omitempty"` | ||||
| 	// TODO(vishh): Export these as stronger types. | ||||
| 	// all the stats exported via memory.stat. | ||||
| 	Stats map[string]uint64 `json:"stats,omitempty"` | ||||
| 	// number of times memory usage hits limits. | ||||
| 	Failcnt uint64 `json:"failcnt"` | ||||
| } | ||||
| 
 | ||||
| type BlkioStatEntry struct { | ||||
| 	Major uint64 `json:"major,omitempty"` | ||||
| 	Minor uint64 `json:"minor,omitempty"` | ||||
| 	Op    string `json:"op,omitempty"` | ||||
| 	Value uint64 `json:"value,omitempty"` | ||||
| } | ||||
| 
 | ||||
| type BlkioStats struct { | ||||
| 	// number of bytes tranferred to and from the block device | ||||
| 	IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` | ||||
| 	IoServicedRecursive     []BlkioStatEntry `json:"io_serviced_recusrive,omitempty"` | ||||
| 	IoQueuedRecursive       []BlkioStatEntry `json:"io_queue_recursive,omitempty"` | ||||
| 	SectorsRecursive        []BlkioStatEntry `json:"sectors_recursive,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // TODO(Vishh): Remove freezer from stats since it does not logically belong in stats. | ||||
| type FreezerStats struct { | ||||
| 	ParentState string `json:"parent_state,omitempty"` | ||||
| 	SelfState   string `json:"self_state,omitempty"` | ||||
| } | ||||
| 
 | ||||
| type Stats struct { | ||||
| 	CpuStats     CpuStats     `json:"cpu_stats,omitempty"` | ||||
| 	MemoryStats  MemoryStats  `json:"memory_stats,omitempty"` | ||||
| 	BlkioStats   BlkioStats   `json:"blkio_stats,omitempty"` | ||||
| 	FreezerStats FreezerStats `json:"freezer_stats,omitempty"` | ||||
| } | ||||
| 
 | ||||
| func NewStats() *Stats { | ||||
| 	memoryStats := MemoryStats{Stats: make(map[string]uint64)} | ||||
| 	return &Stats{MemoryStats: memoryStats} | ||||
| } | ||||
|  | @ -1,25 +0,0 @@ | |||
| // +build !linux | ||||
| 
 | ||||
| package systemd | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| func UseSystemd() bool { | ||||
| 	return false | ||||
| } | ||||
| 
 | ||||
| func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { | ||||
| 	return nil, fmt.Errorf("Systemd not supported") | ||||
| } | ||||
| 
 | ||||
| func GetPids(c *cgroups.Cgroup) ([]int, error) { | ||||
| 	return nil, fmt.Errorf("Systemd not supported") | ||||
| } | ||||
| 
 | ||||
| func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { | ||||
| 	return fmt.Errorf("Systemd not supported") | ||||
| } | ||||
|  | @ -1,391 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package systemd | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"time" | ||||
| 
 | ||||
| 	systemd1 "github.com/coreos/go-systemd/dbus" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| 	"github.com/dotcloud/docker/pkg/systemd" | ||||
| 	"github.com/godbus/dbus" | ||||
| ) | ||||
| 
 | ||||
| type systemdCgroup struct { | ||||
| 	cleanupDirs []string | ||||
| } | ||||
| 
 | ||||
| var ( | ||||
| 	connLock              sync.Mutex | ||||
| 	theConn               *systemd1.Conn | ||||
| 	hasStartTransientUnit bool | ||||
| ) | ||||
| 
 | ||||
| func UseSystemd() bool { | ||||
| 	if !systemd.SdBooted() { | ||||
| 		return false | ||||
| 	} | ||||
| 
 | ||||
| 	connLock.Lock() | ||||
| 	defer connLock.Unlock() | ||||
| 
 | ||||
| 	if theConn == nil { | ||||
| 		var err error | ||||
| 		theConn, err = systemd1.New() | ||||
| 		if err != nil { | ||||
| 			return false | ||||
| 		} | ||||
| 
 | ||||
| 		// Assume we have StartTransientUnit | ||||
| 		hasStartTransientUnit = true | ||||
| 
 | ||||
| 		// But if we get UnknownMethod error we don't | ||||
| 		if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil { | ||||
| 			if dbusError, ok := err.(dbus.Error); ok { | ||||
| 				if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { | ||||
| 					hasStartTransientUnit = false | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return hasStartTransientUnit | ||||
| } | ||||
| 
 | ||||
| func getIfaceForUnit(unitName string) string { | ||||
| 	if strings.HasSuffix(unitName, ".scope") { | ||||
| 		return "Scope" | ||||
| 	} | ||||
| 	if strings.HasSuffix(unitName, ".service") { | ||||
| 		return "Service" | ||||
| 	} | ||||
| 	return "Unit" | ||||
| } | ||||
| 
 | ||||
| type cgroupArg struct { | ||||
| 	File  string | ||||
| 	Value string | ||||
| } | ||||
| 
 | ||||
| func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { | ||||
| 	var ( | ||||
| 		unitName   = getUnitName(c) | ||||
| 		slice      = "system.slice" | ||||
| 		properties []systemd1.Property | ||||
| 		cpuArgs    []cgroupArg | ||||
| 		cpusetArgs []cgroupArg | ||||
| 		memoryArgs []cgroupArg | ||||
| 		res        systemdCgroup | ||||
| 	) | ||||
| 
 | ||||
| 	// First set up things not supported by systemd | ||||
| 
 | ||||
| 	// -1 disables memorySwap | ||||
| 	if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { | ||||
| 		memorySwap := c.MemorySwap | ||||
| 
 | ||||
| 		if memorySwap == 0 { | ||||
| 			// By default, MemorySwap is set to twice the size of RAM. | ||||
| 			memorySwap = c.Memory * 2 | ||||
| 		} | ||||
| 
 | ||||
| 		memoryArgs = append(memoryArgs, cgroupArg{"memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)}) | ||||
| 	} | ||||
| 
 | ||||
| 	if c.CpusetCpus != "" { | ||||
| 		cpusetArgs = append(cpusetArgs, cgroupArg{"cpuset.cpus", c.CpusetCpus}) | ||||
| 	} | ||||
| 
 | ||||
| 	if c.Slice != "" { | ||||
| 		slice = c.Slice | ||||
| 	} | ||||
| 
 | ||||
| 	properties = append(properties, | ||||
| 		systemd1.Property{"Slice", dbus.MakeVariant(slice)}, | ||||
| 		systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)}, | ||||
| 		systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}, | ||||
| 	) | ||||
| 
 | ||||
| 	// Always enable accounting, this gets us the same behaviour as the fs implementation, | ||||
| 	// plus the kernel has some problems with joining the memory cgroup at a later time. | ||||
| 	properties = append(properties, | ||||
| 		systemd1.Property{"MemoryAccounting", dbus.MakeVariant(true)}, | ||||
| 		systemd1.Property{"CPUAccounting", dbus.MakeVariant(true)}, | ||||
| 		systemd1.Property{"BlockIOAccounting", dbus.MakeVariant(true)}) | ||||
| 
 | ||||
| 	if c.Memory != 0 { | ||||
| 		properties = append(properties, | ||||
| 			systemd1.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))}) | ||||
| 	} | ||||
| 	// TODO: MemoryReservation and MemorySwap not available in systemd | ||||
| 
 | ||||
| 	if c.CpuShares != 0 { | ||||
| 		properties = append(properties, | ||||
| 			systemd1.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))}) | ||||
| 	} | ||||
| 
 | ||||
| 	if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	// To work around the lack of /dev/pts/* support above we need to manually add these | ||||
| 	// so, ask systemd for the cgroup used | ||||
| 	props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName)) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	cgroup := props["ControlGroup"].(string) | ||||
| 
 | ||||
| 	if !c.AllowAllDevices { | ||||
| 		// Atm we can't use the systemd device support because of two missing things: | ||||
| 		// * Support for wildcards to allow mknod on any device | ||||
| 		// * Support for wildcards to allow /dev/pts support | ||||
| 		// | ||||
| 		// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is | ||||
| 		// in wide use. When both these are availalable we will be able to switch, but need to keep the old | ||||
| 		// implementation for backwards compat. | ||||
| 		// | ||||
| 		// Note: we can't use systemd to set up the initial limits, and then change the cgroup | ||||
| 		// because systemd will re-write the device settings if it needs to re-apply the cgroup context. | ||||
| 		// This happens at least for v208 when any sibling unit is started. | ||||
| 
 | ||||
| 		mountpoint, err := cgroups.FindCgroupMountpoint("devices") | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		initPath, err := cgroups.GetInitCgroupDir("devices") | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		dir := filepath.Join(mountpoint, initPath, c.Parent, c.Name) | ||||
| 
 | ||||
| 		res.cleanupDirs = append(res.cleanupDirs, dir) | ||||
| 
 | ||||
| 		if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if err := ioutil.WriteFile(filepath.Join(dir, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if err := writeFile(dir, "devices.deny", "a"); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		for _, dev := range c.AllowedDevices { | ||||
| 			if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if len(cpuArgs) != 0 { | ||||
| 		mountpoint, err := cgroups.FindCgroupMountpoint("cpu") | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		path := filepath.Join(mountpoint, cgroup) | ||||
| 
 | ||||
| 		for _, arg := range cpuArgs { | ||||
| 			if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if len(memoryArgs) != 0 { | ||||
| 		mountpoint, err := cgroups.FindCgroupMountpoint("memory") | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		path := filepath.Join(mountpoint, cgroup) | ||||
| 
 | ||||
| 		for _, arg := range memoryArgs { | ||||
| 			if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// we need to manually join the freezer cgroup in systemd because it does not currently support it | ||||
| 	// via the dbus api | ||||
| 	freezerPath, err := joinFreezer(c, pid) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	res.cleanupDirs = append(res.cleanupDirs, freezerPath) | ||||
| 
 | ||||
| 	if len(cpusetArgs) != 0 { | ||||
| 		// systemd does not atm set up the cpuset controller, so we must manually | ||||
| 		// join it. Additionally that is a very finicky controller where each | ||||
| 		// level must have a full setup as the default for a new directory is "no cpus", | ||||
| 		// so we avoid using any hierarchies here, creating a toplevel directory. | ||||
| 		mountpoint, err := cgroups.FindCgroupMountpoint("cpuset") | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		initPath, err := cgroups.GetInitCgroupDir("cpuset") | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		var ( | ||||
| 			foundCpus bool | ||||
| 			foundMems bool | ||||
| 
 | ||||
| 			rootPath = filepath.Join(mountpoint, initPath) | ||||
| 			path     = filepath.Join(mountpoint, initPath, c.Parent+"-"+c.Name) | ||||
| 		) | ||||
| 
 | ||||
| 		res.cleanupDirs = append(res.cleanupDirs, path) | ||||
| 
 | ||||
| 		if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		for _, arg := range cpusetArgs { | ||||
| 			if arg.File == "cpuset.cpus" { | ||||
| 				foundCpus = true | ||||
| 			} | ||||
| 			if arg.File == "cpuset.mems" { | ||||
| 				foundMems = true | ||||
| 			} | ||||
| 			if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		// These are required, if not specified inherit from parent | ||||
| 		if !foundCpus { | ||||
| 			s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.cpus")) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 
 | ||||
| 			if err := ioutil.WriteFile(filepath.Join(path, "cpuset.cpus"), s, 0700); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		// These are required, if not specified inherit from parent | ||||
| 		if !foundMems { | ||||
| 			s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.mems")) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 
 | ||||
| 			if err := ioutil.WriteFile(filepath.Join(path, "cpuset.mems"), s, 0700); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return &res, nil | ||||
| } | ||||
| 
 | ||||
| func writeFile(dir, file, data string) error { | ||||
| 	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) | ||||
| } | ||||
| 
 | ||||
| func (c *systemdCgroup) Cleanup() error { | ||||
| 	// systemd cleans up, we don't need to do much | ||||
| 
 | ||||
| 	for _, path := range c.cleanupDirs { | ||||
| 		os.RemoveAll(path) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func joinFreezer(c *cgroups.Cgroup, pid int) (string, error) { | ||||
| 	path, err := getFreezerPath(c) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	return path, nil | ||||
| } | ||||
| 
 | ||||
| func getFreezerPath(c *cgroups.Cgroup) (string, error) { | ||||
| 	mountpoint, err := cgroups.FindCgroupMountpoint("freezer") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	initPath, err := cgroups.GetInitCgroupDir("freezer") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	return filepath.Join(mountpoint, initPath, fmt.Sprintf("%s-%s", c.Parent, c.Name)), nil | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { | ||||
| 	path, err := getFreezerPath(c) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if err := ioutil.WriteFile(filepath.Join(path, "freezer.state"), []byte(state), 0); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	for { | ||||
| 		state_, err := ioutil.ReadFile(filepath.Join(path, "freezer.state")) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if string(state) == string(bytes.TrimSpace(state_)) { | ||||
| 			break | ||||
| 		} | ||||
| 		time.Sleep(1 * time.Millisecond) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func GetPids(c *cgroups.Cgroup) ([]int, error) { | ||||
| 	unitName := getUnitName(c) | ||||
| 
 | ||||
| 	mountpoint, err := cgroups.FindCgroupMountpoint("cpu") | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName)) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	cgroup := props["ControlGroup"].(string) | ||||
| 
 | ||||
| 	return cgroups.ReadProcsFile(filepath.Join(mountpoint, cgroup)) | ||||
| } | ||||
| 
 | ||||
| func getUnitName(c *cgroups.Cgroup) string { | ||||
| 	return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) | ||||
| } | ||||
|  | @ -1,93 +0,0 @@ | |||
| package cgroups | ||||
| 
 | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/mount" | ||||
| ) | ||||
| 
 | ||||
| // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt | ||||
| func FindCgroupMountpoint(subsystem string) (string, error) { | ||||
| 	mounts, err := mount.GetMounts() | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	for _, mount := range mounts { | ||||
| 		if mount.Fstype == "cgroup" { | ||||
| 			for _, opt := range strings.Split(mount.VfsOpts, ",") { | ||||
| 				if opt == subsystem { | ||||
| 					return mount.Mountpoint, nil | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return "", ErrNotFound | ||||
| } | ||||
| 
 | ||||
| // Returns the relative path to the cgroup docker is running in. | ||||
| func GetThisCgroupDir(subsystem string) (string, error) { | ||||
| 	f, err := os.Open("/proc/self/cgroup") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	return parseCgroupFile(subsystem, f) | ||||
| } | ||||
| 
 | ||||
| func GetInitCgroupDir(subsystem string) (string, error) { | ||||
| 	f, err := os.Open("/proc/1/cgroup") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	return parseCgroupFile(subsystem, f) | ||||
| } | ||||
| 
 | ||||
| func ReadProcsFile(dir string) ([]int, error) { | ||||
| 	f, err := os.Open(filepath.Join(dir, "cgroup.procs")) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	var ( | ||||
| 		s   = bufio.NewScanner(f) | ||||
| 		out = []int{} | ||||
| 	) | ||||
| 
 | ||||
| 	for s.Scan() { | ||||
| 		if t := s.Text(); t != "" { | ||||
| 			pid, err := strconv.Atoi(t) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 			out = append(out, pid) | ||||
| 		} | ||||
| 	} | ||||
| 	return out, nil | ||||
| } | ||||
| 
 | ||||
| func parseCgroupFile(subsystem string, r io.Reader) (string, error) { | ||||
| 	s := bufio.NewScanner(r) | ||||
| 	for s.Scan() { | ||||
| 		if err := s.Err(); err != nil { | ||||
| 			return "", err | ||||
| 		} | ||||
| 		text := s.Text() | ||||
| 		parts := strings.Split(text, ":") | ||||
| 		for _, subs := range strings.Split(parts[1], ",") { | ||||
| 			if subs == subsystem { | ||||
| 				return parts[2], nil | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return "", ErrNotFound | ||||
| } | ||||
|  | @ -1,58 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package console | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/label" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| // Setup initializes the proper /dev/console inside the rootfs path | ||||
| func Setup(rootfs, consolePath, mountLabel string) error { | ||||
| 	oldMask := system.Umask(0000) | ||||
| 	defer system.Umask(oldMask) | ||||
| 
 | ||||
| 	if err := os.Chmod(consolePath, 0600); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := os.Chown(consolePath, 0, 0); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := label.SetFileLabel(consolePath, mountLabel); err != nil { | ||||
| 		return fmt.Errorf("set file label %s %s", consolePath, err) | ||||
| 	} | ||||
| 
 | ||||
| 	dest := filepath.Join(rootfs, "dev/console") | ||||
| 
 | ||||
| 	f, err := os.Create(dest) | ||||
| 	if err != nil && !os.IsExist(err) { | ||||
| 		return fmt.Errorf("create %s %s", dest, err) | ||||
| 	} | ||||
| 	if f != nil { | ||||
| 		f.Close() | ||||
| 	} | ||||
| 
 | ||||
| 	if err := system.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil { | ||||
| 		return fmt.Errorf("bind %s to %s %s", consolePath, dest, err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func OpenAndDup(consolePath string) error { | ||||
| 	slave, err := system.OpenTerminal(consolePath, syscall.O_RDWR) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("open terminal %s", err) | ||||
| 	} | ||||
| 	if err := system.Dup2(slave.Fd(), 0); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := system.Dup2(slave.Fd(), 1); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return system.Dup2(slave.Fd(), 2) | ||||
| } | ||||
|  | @ -1,113 +0,0 @@ | |||
| package libcontainer | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/devices" | ||||
| ) | ||||
| 
 | ||||
| // Context is a generic key value pair that allows arbatrary data to be sent | ||||
| type Context map[string]string | ||||
| 
 | ||||
| // Container defines configuration options for executing a process inside a contained environment | ||||
| type Container struct { | ||||
| 	// Hostname optionally sets the container's hostname if provided | ||||
| 	Hostname string `json:"hostname,omitempty"` | ||||
| 
 | ||||
| 	// ReadonlyFs will remount the container's rootfs as readonly where only externally mounted | ||||
| 	// bind mounts are writtable | ||||
| 	ReadonlyFs bool `json:"readonly_fs,omitempty"` | ||||
| 
 | ||||
| 	// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs | ||||
| 	// This is a common option when the container is running in ramdisk | ||||
| 	NoPivotRoot bool `json:"no_pivot_root,omitempty"` | ||||
| 
 | ||||
| 	// User will set the uid and gid of the executing process running inside the container | ||||
| 	User string `json:"user,omitempty"` | ||||
| 
 | ||||
| 	// WorkingDir will change the processes current working directory inside the container's rootfs | ||||
| 	WorkingDir string `json:"working_dir,omitempty"` | ||||
| 
 | ||||
| 	// Env will populate the processes environment with the provided values | ||||
| 	// Any values from the parent processes will be cleared before the values | ||||
| 	// provided in Env are provided to the process | ||||
| 	Env []string `json:"environment,omitempty"` | ||||
| 
 | ||||
| 	// Tty when true will allocate a pty slave on the host for access by the container's process | ||||
| 	// and ensure that it is mounted inside the container's rootfs | ||||
| 	Tty bool `json:"tty,omitempty"` | ||||
| 
 | ||||
| 	// Namespaces specifies the container's namespaces that it should setup when cloning the init process | ||||
| 	// If a namespace is not provided that namespace is shared from the container's parent process | ||||
| 	Namespaces map[string]bool `json:"namespaces,omitempty"` | ||||
| 
 | ||||
| 	// Capabilities specify the capabilities to keep when executing the process inside the container | ||||
| 	// All capbilities not specified will be dropped from the processes capability mask | ||||
| 	Capabilities []string `json:"capabilities,omitempty"` | ||||
| 
 | ||||
| 	// Networks specifies the container's network setup to be created | ||||
| 	Networks []*Network `json:"networks,omitempty"` | ||||
| 
 | ||||
| 	// Routes can be specified to create entries in the route table as the container is started | ||||
| 	Routes []*Route `json:"routes,omitempty"` | ||||
| 
 | ||||
| 	// Cgroups specifies specific cgroup settings for the various subsystems that the container is | ||||
| 	// placed into to limit the resources the container has available | ||||
| 	Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` | ||||
| 
 | ||||
| 	// Context is a generic key value format that allows for additional settings to be passed | ||||
| 	// on the container's creation | ||||
| 	// This is commonly used to specify apparmor profiles, selinux labels, and different restrictions | ||||
| 	// placed on the container's processes | ||||
| 	Context Context `json:"context,omitempty"` | ||||
| 
 | ||||
| 	// Mounts specify additional source and destination paths that will be mounted inside the container's | ||||
| 	// rootfs and mount namespace if specified | ||||
| 	Mounts Mounts `json:"mounts,omitempty"` | ||||
| 
 | ||||
| 	// The device nodes that should be automatically created within the container upon container start.  Note, make sure that the node is marked as allowed in the cgroup as well! | ||||
| 	DeviceNodes []*devices.Device `json:"device_nodes,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // Network defines configuration for a container's networking stack | ||||
| // | ||||
| // The network configuration can be omited from a container causing the | ||||
| // container to be setup with the host's networking stack | ||||
| type Network struct { | ||||
| 	// Type sets the networks type, commonly veth and loopback | ||||
| 	Type string `json:"type,omitempty"` | ||||
| 
 | ||||
| 	// Context is a generic key value format for setting additional options that are specific to | ||||
| 	// the network type | ||||
| 	Context Context `json:"context,omitempty"` | ||||
| 
 | ||||
| 	// Address contains the IP and mask to set on the network interface | ||||
| 	Address string `json:"address,omitempty"` | ||||
| 
 | ||||
| 	// Gateway sets the gateway address that is used as the default for the interface | ||||
| 	Gateway string `json:"gateway,omitempty"` | ||||
| 
 | ||||
| 	// Mtu sets the mtu value for the interface and will be mirrored on both the host and | ||||
| 	// container's interfaces if a pair is created, specifically in the case of type veth | ||||
| 	Mtu int `json:"mtu,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // Routes can be specified to create entries in the route table as the container is started | ||||
| // | ||||
| // All of destination, source, and gateway should be either IPv4 or IPv6. | ||||
| // One of the three options must be present, and ommitted entries will use their | ||||
| // IP family default for the route table.  For IPv4 for example, setting the | ||||
| // gateway to 1.2.3.4 and the interface to eth0 will set up a standard | ||||
| // destination of 0.0.0.0(or *) when viewed in the route table. | ||||
| type Route struct { | ||||
| 	// Sets the destination and mask, should be a CIDR.  Accepts IPv4 and IPv6 | ||||
| 	Destination string `json:"destination,omitempty"` | ||||
| 
 | ||||
| 	// Sets the source and mask, should be a CIDR.  Accepts IPv4 and IPv6 | ||||
| 	Source string `json:"source,omitempty"` | ||||
| 
 | ||||
| 	// Sets the gateway.  Accepts IPv4 and IPv6 | ||||
| 	Gateway string `json:"gateway,omitempty"` | ||||
| 
 | ||||
| 	// The device to set this route up for, for example: eth0 | ||||
| 	InterfaceName string `json:"interface_name,omitempty"` | ||||
| } | ||||
|  | @ -1,107 +0,0 @@ | |||
| { | ||||
|   "namespaces": { | ||||
|     "NEWNET": true, | ||||
|     "NEWPID": true, | ||||
|     "NEWIPC": true, | ||||
|     "NEWUTS": true, | ||||
|     "NEWNS": true | ||||
|   }, | ||||
|   "networks": [ | ||||
|     { | ||||
|       "gateway": "localhost", | ||||
|       "type": "loopback", | ||||
|       "address": "127.0.0.1/0", | ||||
|       "mtu": 1500 | ||||
|     }, | ||||
|     { | ||||
|       "gateway": "172.17.42.1", | ||||
|       "context": { | ||||
|         "prefix": "veth", | ||||
|         "bridge": "docker0" | ||||
|       }, | ||||
|       "type": "veth", | ||||
|       "address": "172.17.42.2/16", | ||||
|       "mtu": 1500 | ||||
|     } | ||||
|   ], | ||||
|   "routes": [ | ||||
|     { | ||||
|       "gateway": "172.17.42.1", | ||||
|       "interface_name": "eth0" | ||||
|     }, | ||||
|     { | ||||
|       "destination": "192.168.0.0/24", | ||||
|       "interface_name": "eth0" | ||||
|     } | ||||
|   ], | ||||
|   "capabilities": [ | ||||
|     "MKNOD" | ||||
|   ], | ||||
|   "cgroups": { | ||||
|     "name": "docker-koye", | ||||
|     "parent": "docker" | ||||
|   }, | ||||
|   "hostname": "koye", | ||||
|   "environment": [ | ||||
|     "HOME=/", | ||||
|     "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", | ||||
|     "container=docker", | ||||
|     "TERM=xterm-256color" | ||||
|   ], | ||||
|   "tty": true, | ||||
|   "mounts": [ | ||||
|     { | ||||
|       "type": "devtmpfs" | ||||
|     } | ||||
|   ], | ||||
|   "device_nodes": [ | ||||
| 		{ | ||||
| 			"path":  "/dev/null", | ||||
| 			"type":        99, | ||||
| 			"major_number": 1, | ||||
| 			"minor_number": 3, | ||||
| 			"cgroup_permissions": "rwm", | ||||
| 			"file_mode": 438 | ||||
| 		}, | ||||
| 		{ | ||||
| 			"path":  "/dev/zero", | ||||
| 			"type":        99, | ||||
| 			"major_number": 1, | ||||
| 			"minor_number": 5, | ||||
| 			"cgroup_permissions": "rwm", | ||||
| 			"file_mode": 438 | ||||
| 		}, | ||||
| 		{ | ||||
| 			"path":  "/dev/full", | ||||
| 			"type":        99, | ||||
| 			"major_number": 1, | ||||
| 			"minor_number": 7, | ||||
| 			"cgroup_permissions": "rwm", | ||||
| 			"file_mode": 438 | ||||
| 		}, | ||||
| 		{ | ||||
| 			"path":  "/dev/tty", | ||||
| 			"type":        99, | ||||
| 			"major_number": 5, | ||||
| 			"minor_number": 0, | ||||
| 			"cgroup_permissions": "rwm", | ||||
| 			"file_mode": 438 | ||||
| 		}, | ||||
| 		{ | ||||
| 			"path":  "/dev/urandom", | ||||
| 			"type":        99, | ||||
| 			"major_number": 1, | ||||
| 			"minor_number": 9, | ||||
| 			"cgroup_permissions": "rwm", | ||||
| 			"file_mode": 438 | ||||
| 		}, | ||||
| 		{ | ||||
| 			"path":  "/dev/random", | ||||
| 			"type":        99, | ||||
| 			"major_number": 1, | ||||
| 			"minor_number": 8, | ||||
| 			"cgroup_permissions": "rwm", | ||||
| 			"file_mode": 438 | ||||
| 		} | ||||
|   ] | ||||
| } | ||||
|  | @ -1,69 +0,0 @@ | |||
| package libcontainer | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"os" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| // Checks whether the expected capability is specified in the capabilities. | ||||
| func contains(expected string, values []string) bool { | ||||
| 	for _, v := range values { | ||||
| 		if v == expected { | ||||
| 			return true | ||||
| 		} | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| 
 | ||||
| func TestContainerJsonFormat(t *testing.T) { | ||||
| 	f, err := os.Open("container.json") | ||||
| 	if err != nil { | ||||
| 		t.Fatal("Unable to open container.json") | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	var container *Container | ||||
| 	if err := json.NewDecoder(f).Decode(&container); err != nil { | ||||
| 		t.Fatalf("failed to decode container config: %s", err) | ||||
| 	} | ||||
| 	if container.Hostname != "koye" { | ||||
| 		t.Log("hostname is not set") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if !container.Tty { | ||||
| 		t.Log("tty should be set to true") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if len(container.Routes) != 2 { | ||||
| 		t.Log("should have found 2 routes") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if !container.Namespaces["NEWNET"] { | ||||
| 		t.Log("namespaces should contain NEWNET") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if container.Namespaces["NEWUSER"] { | ||||
| 		t.Log("namespaces should not contain NEWUSER") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if contains("SYS_ADMIN", container.Capabilities) { | ||||
| 		t.Log("SYS_ADMIN should not be enabled in capabilities mask") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if !contains("MKNOD", container.Capabilities) { | ||||
| 		t.Log("MKNOD should be enabled in capabilities mask") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 
 | ||||
| 	if contains("SYS_CHROOT", container.Capabilities) { | ||||
| 		t.Log("capabilities mask should not contain SYS_CHROOT") | ||||
| 		t.Fail() | ||||
| 	} | ||||
| } | ||||
|  | @ -1,159 +0,0 @@ | |||
| package devices | ||||
| 
 | ||||
| var ( | ||||
| 	// These are devices that are to be both allowed and created. | ||||
| 
 | ||||
| 	DefaultSimpleDevices = []*Device{ | ||||
| 		// /dev/null and zero | ||||
| 		{ | ||||
| 			Path:              "/dev/null", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       1, | ||||
| 			MinorNumber:       3, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 			FileMode:          0666, | ||||
| 		}, | ||||
| 		{ | ||||
| 			Path:              "/dev/zero", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       1, | ||||
| 			MinorNumber:       5, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 			FileMode:          0666, | ||||
| 		}, | ||||
| 
 | ||||
| 		{ | ||||
| 			Path:              "/dev/full", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       1, | ||||
| 			MinorNumber:       7, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 			FileMode:          0666, | ||||
| 		}, | ||||
| 
 | ||||
| 		// consoles and ttys | ||||
| 		{ | ||||
| 			Path:              "/dev/tty", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       5, | ||||
| 			MinorNumber:       0, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 			FileMode:          0666, | ||||
| 		}, | ||||
| 
 | ||||
| 		// /dev/urandom,/dev/random | ||||
| 		{ | ||||
| 			Path:              "/dev/urandom", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       1, | ||||
| 			MinorNumber:       9, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 			FileMode:          0666, | ||||
| 		}, | ||||
| 		{ | ||||
| 			Path:              "/dev/random", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       1, | ||||
| 			MinorNumber:       8, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 			FileMode:          0666, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	DefaultAllowedDevices = append([]*Device{ | ||||
| 		// allow mknod for any device | ||||
| 		{ | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       Wildcard, | ||||
| 			MinorNumber:       Wildcard, | ||||
| 			CgroupPermissions: "m", | ||||
| 		}, | ||||
| 		{ | ||||
| 			Type:              'b', | ||||
| 			MajorNumber:       Wildcard, | ||||
| 			MinorNumber:       Wildcard, | ||||
| 			CgroupPermissions: "m", | ||||
| 		}, | ||||
| 
 | ||||
| 		{ | ||||
| 			Path:              "/dev/console", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       5, | ||||
| 			MinorNumber:       1, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 		}, | ||||
| 		{ | ||||
| 			Path:              "/dev/tty0", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       4, | ||||
| 			MinorNumber:       0, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 		}, | ||||
| 		{ | ||||
| 			Path:              "/dev/tty1", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       4, | ||||
| 			MinorNumber:       1, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 		}, | ||||
| 		// /dev/pts/ - pts namespaces are "coming soon" | ||||
| 		{ | ||||
| 			Path:              "", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       136, | ||||
| 			MinorNumber:       Wildcard, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 		}, | ||||
| 		{ | ||||
| 			Path:              "", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       5, | ||||
| 			MinorNumber:       2, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 		}, | ||||
| 
 | ||||
| 		// tuntap | ||||
| 		{ | ||||
| 			Path:              "", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       10, | ||||
| 			MinorNumber:       200, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 		}, | ||||
| 
 | ||||
| 		/*// fuse | ||||
| 		   { | ||||
| 		    Path: "", | ||||
| 		    Type: 'c', | ||||
| 		    MajorNumber: 10, | ||||
| 		    MinorNumber: 229, | ||||
| 		    CgroupPermissions: "rwm", | ||||
| 		   }, | ||||
| 
 | ||||
| 		// rtc | ||||
| 		   { | ||||
| 		    Path: "", | ||||
| 		    Type: 'c', | ||||
| 		    MajorNumber: 254, | ||||
| 		    MinorNumber: 0, | ||||
| 		    CgroupPermissions: "rwm", | ||||
| 		   }, | ||||
| 		*/ | ||||
| 	}, DefaultSimpleDevices...) | ||||
| 
 | ||||
| 	DefaultAutoCreatedDevices = append([]*Device{ | ||||
| 		{ | ||||
| 			// /dev/fuse is created but not allowed. | ||||
| 			// This is to allow java to work.  Because java | ||||
| 			// Insists on there being a /dev/fuse | ||||
| 			// https://github.com/dotcloud/docker/issues/514 | ||||
| 			// https://github.com/dotcloud/docker/issues/2393 | ||||
| 			// | ||||
| 			Path:              "/dev/fuse", | ||||
| 			Type:              'c', | ||||
| 			MajorNumber:       10, | ||||
| 			MinorNumber:       229, | ||||
| 			CgroupPermissions: "rwm", | ||||
| 		}, | ||||
| 	}, DefaultSimpleDevices...) | ||||
| ) | ||||
|  | @ -1,119 +0,0 @@ | |||
| package devices | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	Wildcard = -1 | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	ErrNotADeviceNode = errors.New("not a device node") | ||||
| ) | ||||
| 
 | ||||
| type Device struct { | ||||
| 	Type              rune        `json:"type,omitempty"` | ||||
| 	Path              string      `json:"path,omitempty"`               // It is fine if this is an empty string in the case that you are using Wildcards | ||||
| 	MajorNumber       int64       `json:"major_number,omitempty"`       // Use the wildcard constant for wildcards. | ||||
| 	MinorNumber       int64       `json:"minor_number,omitempty"`       // Use the wildcard constant for wildcards. | ||||
| 	CgroupPermissions string      `json:"cgroup_permissions,omitempty"` // Typically just "rwm" | ||||
| 	FileMode          os.FileMode `json:"file_mode,omitempty"`          // The permission bits of the file's mode | ||||
| } | ||||
| 
 | ||||
| func GetDeviceNumberString(deviceNumber int64) string { | ||||
| 	if deviceNumber == Wildcard { | ||||
| 		return "*" | ||||
| 	} else { | ||||
| 		return fmt.Sprintf("%d", deviceNumber) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (device *Device) GetCgroupAllowString() string { | ||||
| 	return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions) | ||||
| } | ||||
| 
 | ||||
| // Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct. | ||||
| func GetDevice(path string, cgroupPermissions string) (*Device, error) { | ||||
| 	fileInfo, err := os.Stat(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	var ( | ||||
| 		devType                rune | ||||
| 		mode                   = fileInfo.Mode() | ||||
| 		fileModePermissionBits = os.FileMode.Perm(mode) | ||||
| 	) | ||||
| 
 | ||||
| 	switch { | ||||
| 	case mode&os.ModeDevice == 0: | ||||
| 		return nil, ErrNotADeviceNode | ||||
| 	case mode&os.ModeCharDevice != 0: | ||||
| 		fileModePermissionBits |= syscall.S_IFCHR | ||||
| 		devType = 'c' | ||||
| 	default: | ||||
| 		fileModePermissionBits |= syscall.S_IFBLK | ||||
| 		devType = 'b' | ||||
| 	} | ||||
| 
 | ||||
| 	stat_t, ok := fileInfo.Sys().(*syscall.Stat_t) | ||||
| 	if !ok { | ||||
| 		return nil, fmt.Errorf("cannot determine the device number for device %s", path) | ||||
| 	} | ||||
| 	devNumber := int(stat_t.Rdev) | ||||
| 
 | ||||
| 	return &Device{ | ||||
| 		Type:              devType, | ||||
| 		Path:              path, | ||||
| 		MajorNumber:       Major(devNumber), | ||||
| 		MinorNumber:       Minor(devNumber), | ||||
| 		CgroupPermissions: cgroupPermissions, | ||||
| 		FileMode:          fileModePermissionBits, | ||||
| 	}, nil | ||||
| } | ||||
| 
 | ||||
| func GetHostDeviceNodes() ([]*Device, error) { | ||||
| 	return getDeviceNodes("/dev") | ||||
| } | ||||
| 
 | ||||
| func getDeviceNodes(path string) ([]*Device, error) { | ||||
| 	files, err := ioutil.ReadDir(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	out := []*Device{} | ||||
| 	for _, f := range files { | ||||
| 		if f.IsDir() { | ||||
| 			switch f.Name() { | ||||
| 			case "pts", "shm", "fd": | ||||
| 				continue | ||||
| 			default: | ||||
| 				sub, err := getDeviceNodes(filepath.Join(path, f.Name())) | ||||
| 				if err != nil { | ||||
| 					return nil, err | ||||
| 				} | ||||
| 
 | ||||
| 				out = append(out, sub...) | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		device, err := GetDevice(filepath.Join(path, f.Name()), "rwm") | ||||
| 		if err != nil { | ||||
| 			if err == ErrNotADeviceNode { | ||||
| 				continue | ||||
| 			} | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		out = append(out, device) | ||||
| 	} | ||||
| 
 | ||||
| 	return out, nil | ||||
| } | ||||
|  | @ -1,26 +0,0 @@ | |||
| package devices | ||||
| 
 | ||||
| /* | ||||
| 
 | ||||
| This code provides support for manipulating linux device numbers.  It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved. | ||||
| 
 | ||||
| You can read what they are here: | ||||
| 
 | ||||
|  - http://www.makelinux.net/ldd3/chp-3-sect-2 | ||||
|  - http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94 | ||||
| 
 | ||||
| Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in <linux/kdev_t.h> as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9 | ||||
| 
 | ||||
| */ | ||||
| 
 | ||||
| func Major(devNumber int) int64 { | ||||
| 	return int64((devNumber >> 8) & 0xfff) | ||||
| } | ||||
| 
 | ||||
| func Minor(devNumber int) int64 { | ||||
| 	return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) | ||||
| } | ||||
| 
 | ||||
| func Mkdev(majorNumber int64, minorNumber int64) int { | ||||
| 	return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12)) | ||||
| } | ||||
|  | @ -1,30 +0,0 @@ | |||
| // +build !selinux !linux | ||||
| 
 | ||||
| package label | ||||
| 
 | ||||
| func GenLabels(options string) (string, string, error) { | ||||
| 	return "", "", nil | ||||
| } | ||||
| 
 | ||||
| func FormatMountLabel(src string, mountLabel string) string { | ||||
| 	return src | ||||
| } | ||||
| 
 | ||||
| func SetProcessLabel(processLabel string) error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func SetFileLabel(path string, fileLabel string) error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func GetPidCon(pid int) (string, error) { | ||||
| 	return "", nil | ||||
| } | ||||
| 
 | ||||
| func Init() { | ||||
| } | ||||
| 
 | ||||
| func ReserveLabel(label string) error { | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,83 +0,0 @@ | |||
| // +build selinux,linux | ||||
| 
 | ||||
| package label | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/selinux" | ||||
| ) | ||||
| 
 | ||||
| func GenLabels(options string) (string, string, error) { | ||||
| 	if !selinux.SelinuxEnabled() { | ||||
| 		return "", "", nil | ||||
| 	} | ||||
| 	var err error | ||||
| 	processLabel, mountLabel := selinux.GetLxcContexts() | ||||
| 	if processLabel != "" { | ||||
| 		var ( | ||||
| 			s = strings.Fields(options) | ||||
| 			l = len(s) | ||||
| 		) | ||||
| 		if l > 0 { | ||||
| 			pcon := selinux.NewContext(processLabel) | ||||
| 			for i := 0; i < l; i++ { | ||||
| 				o := strings.Split(s[i], "=") | ||||
| 				pcon[o[0]] = o[1] | ||||
| 			} | ||||
| 			processLabel = pcon.Get() | ||||
| 			mountLabel, err = selinux.CopyLevel(processLabel, mountLabel) | ||||
| 		} | ||||
| 	} | ||||
| 	return processLabel, mountLabel, err | ||||
| } | ||||
| 
 | ||||
| func FormatMountLabel(src, mountLabel string) string { | ||||
| 	if mountLabel != "" { | ||||
| 		switch src { | ||||
| 		case "": | ||||
| 			src = fmt.Sprintf("context=%q", mountLabel) | ||||
| 		default: | ||||
| 			src = fmt.Sprintf("%s,context=%q", src, mountLabel) | ||||
| 		} | ||||
| 	} | ||||
| 	return src | ||||
| } | ||||
| 
 | ||||
| func SetProcessLabel(processLabel string) error { | ||||
| 	if selinux.SelinuxEnabled() { | ||||
| 		return selinux.Setexeccon(processLabel) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func GetProcessLabel() (string, error) { | ||||
| 	if selinux.SelinuxEnabled() { | ||||
| 		return selinux.Getexeccon() | ||||
| 	} | ||||
| 	return "", nil | ||||
| } | ||||
| 
 | ||||
| func SetFileLabel(path string, fileLabel string) error { | ||||
| 	if selinux.SelinuxEnabled() && fileLabel != "" { | ||||
| 		return selinux.Setfilecon(path, fileLabel) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func GetPidCon(pid int) (string, error) { | ||||
| 	if !selinux.SelinuxEnabled() { | ||||
| 		return "", nil | ||||
| 	} | ||||
| 	return selinux.Getpidcon(pid) | ||||
| } | ||||
| 
 | ||||
| func Init() { | ||||
| 	selinux.SelinuxEnabled() | ||||
| } | ||||
| 
 | ||||
| func ReserveLabel(label string) error { | ||||
| 	selinux.ReserveLabel(label) | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,201 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package mount | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/label" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes" | ||||
| 	"github.com/dotcloud/docker/pkg/symlink" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| // default mount point flags | ||||
| const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV | ||||
| 
 | ||||
| type mount struct { | ||||
| 	source string | ||||
| 	path   string | ||||
| 	device string | ||||
| 	flags  int | ||||
| 	data   string | ||||
| } | ||||
| 
 | ||||
| // InitializeMountNamespace setups up the devices, mount points, and filesystems for use inside a | ||||
| // new mount namepsace | ||||
| func InitializeMountNamespace(rootfs, console string, container *libcontainer.Container) error { | ||||
| 	var ( | ||||
| 		err  error | ||||
| 		flag = syscall.MS_PRIVATE | ||||
| 	) | ||||
| 	if container.NoPivotRoot { | ||||
| 		flag = syscall.MS_SLAVE | ||||
| 	} | ||||
| 	if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil { | ||||
| 		return fmt.Errorf("mounting / with flags %X %s", (flag | syscall.MS_REC), err) | ||||
| 	} | ||||
| 	if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { | ||||
| 		return fmt.Errorf("mouting %s as bind %s", rootfs, err) | ||||
| 	} | ||||
| 	if err := mountSystem(rootfs, container); err != nil { | ||||
| 		return fmt.Errorf("mount system %s", err) | ||||
| 	} | ||||
| 	if err := setupBindmounts(rootfs, container.Mounts); err != nil { | ||||
| 		return fmt.Errorf("bind mounts %s", err) | ||||
| 	} | ||||
| 	if err := nodes.CreateDeviceNodes(rootfs, container.DeviceNodes); err != nil { | ||||
| 		return fmt.Errorf("create device nodes %s", err) | ||||
| 	} | ||||
| 	if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := setupDevSymlinks(rootfs); err != nil { | ||||
| 		return fmt.Errorf("dev symlinks %s", err) | ||||
| 	} | ||||
| 	if err := system.Chdir(rootfs); err != nil { | ||||
| 		return fmt.Errorf("chdir into %s %s", rootfs, err) | ||||
| 	} | ||||
| 
 | ||||
| 	if container.NoPivotRoot { | ||||
| 		err = MsMoveRoot(rootfs) | ||||
| 	} else { | ||||
| 		err = PivotRoot(rootfs) | ||||
| 	} | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if container.ReadonlyFs { | ||||
| 		if err := SetReadonly(); err != nil { | ||||
| 			return fmt.Errorf("set readonly %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	system.Umask(0022) | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts | ||||
| // inside the mount namespace | ||||
| func mountSystem(rootfs string, container *libcontainer.Container) error { | ||||
| 	for _, m := range newSystemMounts(rootfs, container.Context["mount_label"], container.Mounts) { | ||||
| 		if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { | ||||
| 			return fmt.Errorf("mkdirall %s %s", m.path, err) | ||||
| 		} | ||||
| 		if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { | ||||
| 			return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func createIfNotExists(path string, isDir bool) error { | ||||
| 	if _, err := os.Stat(path); err != nil { | ||||
| 		if os.IsNotExist(err) { | ||||
| 			if isDir { | ||||
| 				if err := os.MkdirAll(path, 0755); err != nil { | ||||
| 					return err | ||||
| 				} | ||||
| 			} else { | ||||
| 				if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { | ||||
| 					return err | ||||
| 				} | ||||
| 				f, err := os.OpenFile(path, os.O_CREATE, 0755) | ||||
| 				if err != nil { | ||||
| 					return err | ||||
| 				} | ||||
| 				f.Close() | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func setupDevSymlinks(rootfs string) error { | ||||
| 	var links = [][2]string{ | ||||
| 		{"/proc/self/fd", "/dev/fd"}, | ||||
| 		{"/proc/self/fd/0", "/dev/stdin"}, | ||||
| 		{"/proc/self/fd/1", "/dev/stdout"}, | ||||
| 		{"/proc/self/fd/2", "/dev/stderr"}, | ||||
| 	} | ||||
| 
 | ||||
| 	// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink | ||||
| 	// in /dev if it exists in /proc. | ||||
| 	if _, err := os.Stat("/proc/kcore"); err == nil { | ||||
| 		links = append(links, [2]string{"/proc/kcore", "/dev/kcore"}) | ||||
| 	} | ||||
| 
 | ||||
| 	for _, link := range links { | ||||
| 		var ( | ||||
| 			src = link[0] | ||||
| 			dst = filepath.Join(rootfs, link[1]) | ||||
| 		) | ||||
| 
 | ||||
| 		if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { | ||||
| 			return fmt.Errorf("symlink %s %s %s", src, dst, err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error { | ||||
| 	for _, m := range bindMounts.OfType("bind") { | ||||
| 		var ( | ||||
| 			flags = syscall.MS_BIND | syscall.MS_REC | ||||
| 			dest  = filepath.Join(rootfs, m.Destination) | ||||
| 		) | ||||
| 		if !m.Writable { | ||||
| 			flags = flags | syscall.MS_RDONLY | ||||
| 		} | ||||
| 
 | ||||
| 		stat, err := os.Stat(m.Source) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		dest, err = symlink.FollowSymlinkInScope(dest, rootfs) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		if err := createIfNotExists(dest, stat.IsDir()); err != nil { | ||||
| 			return fmt.Errorf("Creating new bind-mount target, %s", err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil { | ||||
| 			return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err) | ||||
| 		} | ||||
| 		if !m.Writable { | ||||
| 			if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil { | ||||
| 				return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err) | ||||
| 			} | ||||
| 		} | ||||
| 		if m.Private { | ||||
| 			if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { | ||||
| 				return fmt.Errorf("mounting %s private %s", dest, err) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // TODO: this is crappy right now and should be cleaned up with a better way of handling system and | ||||
| // standard bind mounts allowing them to be more dynamic | ||||
| func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount { | ||||
| 	systemMounts := []mount{ | ||||
| 		{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, | ||||
| 		{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}, | ||||
| 		{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}, | ||||
| 		{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, | ||||
| 		{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, | ||||
| 	} | ||||
| 
 | ||||
| 	return systemMounts | ||||
| } | ||||
|  | @ -1,19 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package mount | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| func MsMoveRoot(rootfs string) error { | ||||
| 	if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { | ||||
| 		return fmt.Errorf("mount move %s into / %s", rootfs, err) | ||||
| 	} | ||||
| 	if err := system.Chroot("."); err != nil { | ||||
| 		return fmt.Errorf("chroot . %s", err) | ||||
| 	} | ||||
| 	return system.Chdir("/") | ||||
| } | ||||
|  | @ -1,53 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package nodes | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/devices" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| // Create the device nodes in the container. | ||||
| func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { | ||||
| 	oldMask := system.Umask(0000) | ||||
| 	defer system.Umask(oldMask) | ||||
| 
 | ||||
| 	for _, node := range nodesToCreate { | ||||
| 		if err := CreateDeviceNode(rootfs, node); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Creates the device node in the rootfs of the container. | ||||
| func CreateDeviceNode(rootfs string, node *devices.Device) error { | ||||
| 	var ( | ||||
| 		dest   = filepath.Join(rootfs, node.Path) | ||||
| 		parent = filepath.Dir(dest) | ||||
| 	) | ||||
| 
 | ||||
| 	if err := os.MkdirAll(parent, 0755); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	fileMode := node.FileMode | ||||
| 	switch node.Type { | ||||
| 	case 'c': | ||||
| 		fileMode |= syscall.S_IFCHR | ||||
| 	case 'b': | ||||
| 		fileMode |= syscall.S_IFBLK | ||||
| 	default: | ||||
| 		return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) | ||||
| 	} | ||||
| 
 | ||||
| 	if err := system.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) { | ||||
| 		return fmt.Errorf("mknod %s %s", node.Path, err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,12 +0,0 @@ | |||
| // +build !linux | ||||
| 
 | ||||
| package nodes | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/devices" | ||||
| ) | ||||
| 
 | ||||
| func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { | ||||
| 	return libcontainer.ErrUnsupported | ||||
| } | ||||
|  | @ -1,31 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package mount | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| func PivotRoot(rootfs string) error { | ||||
| 	pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root") | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err) | ||||
| 	} | ||||
| 	if err := system.Pivotroot(rootfs, pivotDir); err != nil { | ||||
| 		return fmt.Errorf("pivot_root %s", err) | ||||
| 	} | ||||
| 	if err := system.Chdir("/"); err != nil { | ||||
| 		return fmt.Errorf("chdir / %s", err) | ||||
| 	} | ||||
| 	// path to pivot dir now changed, update | ||||
| 	pivotDir = filepath.Join("/", filepath.Base(pivotDir)) | ||||
| 	if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { | ||||
| 		return fmt.Errorf("unmount pivot_root dir %s", err) | ||||
| 	} | ||||
| 	return os.Remove(pivotDir) | ||||
| } | ||||
|  | @ -1,26 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package mount | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/console" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| ) | ||||
| 
 | ||||
| func SetupPtmx(rootfs, consolePath, mountLabel string) error { | ||||
| 	ptmx := filepath.Join(rootfs, "dev/ptmx") | ||||
| 	if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := os.Symlink("pts/ptmx", ptmx); err != nil { | ||||
| 		return fmt.Errorf("symlink dev ptmx %s", err) | ||||
| 	} | ||||
| 	if consolePath != "" { | ||||
| 		if err := console.Setup(rootfs, consolePath, mountLabel); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,12 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package mount | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| func SetReadonly() error { | ||||
| 	return system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") | ||||
| } | ||||
|  | @ -1,31 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package mount | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| func RemountProc() error { | ||||
| 	if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func RemountSys() error { | ||||
| 	if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil { | ||||
| 		if err != syscall.EINVAL { | ||||
| 			return err | ||||
| 		} | ||||
| 	} else { | ||||
| 		if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,10 +0,0 @@ | |||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| ) | ||||
| 
 | ||||
| type CreateCommand func(container *libcontainer.Container, console, rootfs, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd | ||||
|  | @ -1,176 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups/fs" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups/systemd" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/network" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| // Exec performes setup outside of a namespace so that a container can be | ||||
| // executed.  Exec is a high level function for working with container namespaces. | ||||
| func Exec(container *libcontainer.Container, term Terminal, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { | ||||
| 	var ( | ||||
| 		master  *os.File | ||||
| 		console string | ||||
| 		err     error | ||||
| 	) | ||||
| 
 | ||||
| 	// create a pipe so that we can syncronize with the namespaced process and | ||||
| 	// pass the veth name to the child | ||||
| 	syncPipe, err := NewSyncPipe() | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 
 | ||||
| 	if container.Tty { | ||||
| 		master, console, err = system.CreateMasterAndConsole() | ||||
| 		if err != nil { | ||||
| 			return -1, err | ||||
| 		} | ||||
| 		term.SetMaster(master) | ||||
| 	} | ||||
| 
 | ||||
| 	command := createCommand(container, console, rootfs, dataPath, os.Args[0], syncPipe.child, args) | ||||
| 
 | ||||
| 	if err := term.Attach(command); err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	defer term.Close() | ||||
| 
 | ||||
| 	if err := command.Start(); err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 
 | ||||
| 	started, err := system.GetProcessStartTime(command.Process.Pid) | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	if err := WritePid(dataPath, command.Process.Pid, started); err != nil { | ||||
| 		command.Process.Kill() | ||||
| 		command.Wait() | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	defer DeletePid(dataPath) | ||||
| 
 | ||||
| 	// Do this before syncing with child so that no children | ||||
| 	// can escape the cgroup | ||||
| 	cleaner, err := SetupCgroups(container, command.Process.Pid) | ||||
| 	if err != nil { | ||||
| 		command.Process.Kill() | ||||
| 		command.Wait() | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	if cleaner != nil { | ||||
| 		defer cleaner.Cleanup() | ||||
| 	} | ||||
| 
 | ||||
| 	if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil { | ||||
| 		command.Process.Kill() | ||||
| 		command.Wait() | ||||
| 		return -1, err | ||||
| 	} | ||||
| 
 | ||||
| 	// Sync with child | ||||
| 	syncPipe.Close() | ||||
| 
 | ||||
| 	if startCallback != nil { | ||||
| 		startCallback() | ||||
| 	} | ||||
| 
 | ||||
| 	if err := command.Wait(); err != nil { | ||||
| 		if _, ok := err.(*exec.ExitError); !ok { | ||||
| 			return -1, err | ||||
| 		} | ||||
| 	} | ||||
| 	return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil | ||||
| } | ||||
| 
 | ||||
| // DefaultCreateCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces | ||||
| // defined on the container's configuration and use the current binary as the init with the | ||||
| // args provided | ||||
| // | ||||
| // console: the /dev/console to setup inside the container | ||||
| // init: the progam executed inside the namespaces | ||||
| // root: the path to the container json file and information | ||||
| // pipe: sync pipe to syncronize the parent and child processes | ||||
| // args: the arguemnts to pass to the container to run as the user's program | ||||
| func DefaultCreateCommand(container *libcontainer.Container, console, rootfs, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { | ||||
| 	// get our binary name from arg0 so we can always reexec ourself | ||||
| 	env := []string{ | ||||
| 		"console=" + console, | ||||
| 		"pipe=3", | ||||
| 		"data_path=" + dataPath, | ||||
| 	} | ||||
| 
 | ||||
| 	/* | ||||
| 	   TODO: move user and wd into env | ||||
| 	   if user != "" { | ||||
| 	       env = append(env, "user="+user) | ||||
| 	   } | ||||
| 	   if workingDir != "" { | ||||
| 	       env = append(env, "wd="+workingDir) | ||||
| 	   } | ||||
| 	*/ | ||||
| 
 | ||||
| 	command := exec.Command(init, append([]string{"init"}, args...)...) | ||||
| 	// make sure the process is executed inside the context of the rootfs | ||||
| 	command.Dir = rootfs | ||||
| 	command.Env = append(os.Environ(), env...) | ||||
| 
 | ||||
| 	system.SetCloneFlags(command, uintptr(GetNamespaceFlags(container.Namespaces))) | ||||
| 	command.SysProcAttr.Pdeathsig = syscall.SIGKILL | ||||
| 	command.ExtraFiles = []*os.File{pipe} | ||||
| 
 | ||||
| 	return command | ||||
| } | ||||
| 
 | ||||
| // SetupCgroups applies the cgroup restrictions to the process running in the contaienr based | ||||
| // on the container's configuration | ||||
| func SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) { | ||||
| 	if container.Cgroups != nil { | ||||
| 		c := container.Cgroups | ||||
| 		if systemd.UseSystemd() { | ||||
| 			return systemd.Apply(c, nspid) | ||||
| 		} | ||||
| 		return fs.Apply(c, nspid) | ||||
| 	} | ||||
| 	return nil, nil | ||||
| } | ||||
| 
 | ||||
| // InitializeNetworking creates the container's network stack outside of the namespace and moves | ||||
| // interfaces into the container's net namespaces if necessary | ||||
| func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { | ||||
| 	context := libcontainer.Context{} | ||||
| 	for _, config := range container.Networks { | ||||
| 		strategy, err := network.GetStrategy(config.Type) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if err := strategy.Create(config, nspid, context); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return pipe.SendToChild(context) | ||||
| } | ||||
| 
 | ||||
| // GetNamespaceFlags parses the container's Namespaces options to set the correct | ||||
| // flags on clone, unshare, and setns | ||||
| func GetNamespaceFlags(namespaces map[string]bool) (flag int) { | ||||
| 	for key, enabled := range namespaces { | ||||
| 		if enabled { | ||||
| 			if ns := libcontainer.GetNamespace(key); ns != nil { | ||||
| 				flag |= ns.Value | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return flag | ||||
| } | ||||
|  | @ -1,56 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/label" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| // ExecIn uses an existing pid and joins the pid's namespaces with the new command. | ||||
| func ExecIn(container *libcontainer.Container, nspid int, args []string) error { | ||||
| 	// TODO(vmarmol): If this gets too long, send it over a pipe to the child. | ||||
| 	// Marshall the container into JSON since it won't be available in the namespace. | ||||
| 	containerJson, err := json.Marshal(container) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// TODO(vmarmol): Move this to the container JSON. | ||||
| 	processLabel, err := label.GetPidCon(nspid) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// Enter the namespace and then finish setup | ||||
| 	finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)} | ||||
| 	finalArgs = append(finalArgs, args...) | ||||
| 	if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	panic("unreachable") | ||||
| } | ||||
| 
 | ||||
| // NsEnter is run after entering the namespace. | ||||
| func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error { | ||||
| 	// clear the current processes env and replace it with the environment | ||||
| 	// defined on the container | ||||
| 	if err := LoadContainerEnvironment(container); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := FinalizeNamespace(container); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := label.SetProcessLabel(processLabel); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := system.Execv(args[0], args[0:], container.Env); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	panic("unreachable") | ||||
| } | ||||
|  | @ -1,235 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"runtime" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/apparmor" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/console" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/label" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/mount" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/netlink" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/network" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/security/restrict" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/utils" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| 	"github.com/dotcloud/docker/pkg/user" | ||||
| ) | ||||
| 
 | ||||
| // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, | ||||
| // and other options required for the new container. | ||||
| func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error { | ||||
| 	rootfs, err := utils.ResolveRootfs(uncleanRootfs) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// clear the current processes env and replace it with the environment | ||||
| 	// defined on the container | ||||
| 	if err := LoadContainerEnvironment(container); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// We always read this as it is a way to sync with the parent as well | ||||
| 	context, err := syncPipe.ReadFromParent() | ||||
| 	if err != nil { | ||||
| 		syncPipe.Close() | ||||
| 		return err | ||||
| 	} | ||||
| 	syncPipe.Close() | ||||
| 
 | ||||
| 	if consolePath != "" { | ||||
| 		if err := console.OpenAndDup(consolePath); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if _, err := system.Setsid(); err != nil { | ||||
| 		return fmt.Errorf("setsid %s", err) | ||||
| 	} | ||||
| 	if consolePath != "" { | ||||
| 		if err := system.Setctty(); err != nil { | ||||
| 			return fmt.Errorf("setctty %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := setupNetwork(container, context); err != nil { | ||||
| 		return fmt.Errorf("setup networking %s", err) | ||||
| 	} | ||||
| 	if err := setupRoute(container); err != nil { | ||||
| 		return fmt.Errorf("setup route %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	label.Init() | ||||
| 
 | ||||
| 	if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil { | ||||
| 		return fmt.Errorf("setup mount namespace %s", err) | ||||
| 	} | ||||
| 	if container.Hostname != "" { | ||||
| 		if err := system.Sethostname(container.Hostname); err != nil { | ||||
| 			return fmt.Errorf("sethostname %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	runtime.LockOSThread() | ||||
| 
 | ||||
| 	if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil { | ||||
| 		return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err) | ||||
| 	} | ||||
| 	if err := label.SetProcessLabel(container.Context["process_label"]); err != nil { | ||||
| 		return fmt.Errorf("set process label %s", err) | ||||
| 	} | ||||
| 	if container.Context["restrictions"] != "" { | ||||
| 		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	pdeathSignal, err := system.GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get parent death signal %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if err := FinalizeNamespace(container); err != nil { | ||||
| 		return fmt.Errorf("finalize namespace %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	// FinalizeNamespace can change user/group which clears the parent death | ||||
| 	// signal, so we restore it here. | ||||
| 	if err := RestoreParentDeathSignal(pdeathSignal); err != nil { | ||||
| 		return fmt.Errorf("restore parent death signal %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	return system.Execv(args[0], args[0:], container.Env) | ||||
| } | ||||
| 
 | ||||
| // RestoreParentDeathSignal sets the parent death signal to old. | ||||
| func RestoreParentDeathSignal(old int) error { | ||||
| 	if old == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	current, err := system.GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get parent death signal %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if old == current { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	if err := system.ParentDeathSignal(uintptr(old)); err != nil { | ||||
| 		return fmt.Errorf("set parent death signal %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	// Signal self if parent is already dead. Does nothing if running in a new | ||||
| 	// PID namespace, as Getppid will always return 0. | ||||
| 	if syscall.Getppid() == 1 { | ||||
| 		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // SetupUser changes the groups, gid, and uid for the user inside the container | ||||
| func SetupUser(u string) error { | ||||
| 	uid, gid, suppGids, err := user.GetUserGroupSupplementary(u, syscall.Getuid(), syscall.Getgid()) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get supplementary groups %s", err) | ||||
| 	} | ||||
| 	if err := system.Setgroups(suppGids); err != nil { | ||||
| 		return fmt.Errorf("setgroups %s", err) | ||||
| 	} | ||||
| 	if err := system.Setgid(gid); err != nil { | ||||
| 		return fmt.Errorf("setgid %s", err) | ||||
| 	} | ||||
| 	if err := system.Setuid(uid); err != nil { | ||||
| 		return fmt.Errorf("setuid %s", err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // setupVethNetwork uses the Network config if it is not nil to initialize | ||||
| // the new veth interface inside the container for use by changing the name to eth0 | ||||
| // setting the MTU and IP address along with the default gateway | ||||
| func setupNetwork(container *libcontainer.Container, context libcontainer.Context) error { | ||||
| 	for _, config := range container.Networks { | ||||
| 		strategy, err := network.GetStrategy(config.Type) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		err1 := strategy.Initialize(config, context) | ||||
| 		if err1 != nil { | ||||
| 			return err1 | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func setupRoute(container *libcontainer.Container) error { | ||||
| 	for _, config := range container.Routes { | ||||
| 		if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // FinalizeNamespace drops the caps, sets the correct user | ||||
| // and working dir, and closes any leaky file descriptors | ||||
| // before execing the command inside the namespace | ||||
| func FinalizeNamespace(container *libcontainer.Container) error { | ||||
| 	if err := system.CloseFdsFrom(3); err != nil { | ||||
| 		return fmt.Errorf("close open file descriptors %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	// drop capabilities in bounding set before changing user | ||||
| 	if err := capabilities.DropBoundingSet(container); err != nil { | ||||
| 		return fmt.Errorf("drop bounding set %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	// preserve existing capabilities while we change users | ||||
| 	if err := system.SetKeepCaps(); err != nil { | ||||
| 		return fmt.Errorf("set keep caps %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if err := SetupUser(container.User); err != nil { | ||||
| 		return fmt.Errorf("setup user %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if err := system.ClearKeepCaps(); err != nil { | ||||
| 		return fmt.Errorf("clear keep caps %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	// drop all other capabilities | ||||
| 	if err := capabilities.DropCapabilities(container); err != nil { | ||||
| 		return fmt.Errorf("drop capabilities %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if container.WorkingDir != "" { | ||||
| 		if err := system.Chdir(container.WorkingDir); err != nil { | ||||
| 			return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func LoadContainerEnvironment(container *libcontainer.Container) error { | ||||
| 	os.Clearenv() | ||||
| 	for _, pair := range container.Env { | ||||
| 		p := strings.SplitN(pair, "=", 2) | ||||
| 		if len(p) < 2 { | ||||
| 			return fmt.Errorf("invalid environment '%v'", pair) | ||||
| 		} | ||||
| 		if err := os.Setenv(p[0], p[1]); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,143 +0,0 @@ | |||
| package namespaces | ||||
| 
 | ||||
| /* | ||||
| #include <dirent.h> | ||||
| #include <errno.h> | ||||
| #include <fcntl.h> | ||||
| #include <linux/sched.h> | ||||
| #include <signal.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <sys/stat.h> | ||||
| #include <sys/types.h> | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| static const kBufSize = 256; | ||||
| 
 | ||||
| void get_args(int *argc, char ***argv) { | ||||
| 	// Read argv | ||||
| 	int fd = open("/proc/self/cmdline", O_RDONLY); | ||||
| 
 | ||||
| 	// Read the whole commandline. | ||||
| 	ssize_t contents_size = 0; | ||||
| 	ssize_t contents_offset = 0; | ||||
| 	char *contents = NULL; | ||||
| 	ssize_t bytes_read = 0; | ||||
| 	do { | ||||
| 		contents_size += kBufSize; | ||||
| 		contents = (char *) realloc(contents, contents_size); | ||||
| 		bytes_read = read(fd, contents + contents_offset, contents_size - contents_offset); | ||||
| 		contents_offset += bytes_read; | ||||
| 	} while (bytes_read > 0); | ||||
| 	close(fd); | ||||
| 
 | ||||
| 	// Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args. | ||||
| 	ssize_t i; | ||||
| 	*argc = 0; | ||||
| 	for (i = 0; i < contents_offset; i++) { | ||||
| 		if (contents[i] == '\0') { | ||||
| 			(*argc)++; | ||||
| 		} | ||||
| 	} | ||||
| 	*argv = (char **) malloc(sizeof(char *) * ((*argc) + 1)); | ||||
| 	int idx; | ||||
| 	for (idx = 0; idx < (*argc); idx++) { | ||||
| 		(*argv)[idx] = contents; | ||||
| 		contents += strlen(contents) + 1; | ||||
| 	} | ||||
| 	(*argv)[*argc] = NULL; | ||||
| } | ||||
| 
 | ||||
| void nsenter() { | ||||
| 	int argc; | ||||
| 	char **argv; | ||||
| 	get_args(&argc, &argv); | ||||
| 
 | ||||
| 	// Ignore if this is not for us. | ||||
| 	if (argc < 2 || strcmp(argv[1], "nsenter") != 0) { | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	// USAGE: <binary> nsenter <PID> <process label> <container JSON> <argv>... | ||||
| 	if (argc < 6) { | ||||
| 		fprintf(stderr, "nsenter: Incorrect usage, not enough arguments\n"); | ||||
| 		exit(1); | ||||
| 	} | ||||
| 	pid_t init_pid = strtol(argv[2], NULL, 10); | ||||
| 	if (errno != 0 || init_pid <= 0) { | ||||
| 		fprintf(stderr, "nsenter: Failed to parse PID from \"%s\" with error: \"%s\"\n", argv[2], strerror(errno)); | ||||
| 		exit(1); | ||||
| 	} | ||||
| 	argc -= 3; | ||||
| 	argv += 3; | ||||
| 
 | ||||
| 	// Setns on all supported namespaces. | ||||
| 	char ns_dir[kBufSize]; | ||||
| 	memset(ns_dir, 0, kBufSize); | ||||
| 	if (snprintf(ns_dir, kBufSize - 1, "/proc/%d/ns/", init_pid) < 0) { | ||||
| 		fprintf(stderr, "nsenter: Error getting ns dir path with error: \"%s\"\n", strerror(errno)); | ||||
| 		exit(1); | ||||
| 	} | ||||
| 	struct dirent *dent; | ||||
| 	DIR *dir = opendir(ns_dir); | ||||
| 	if (dir == NULL) { | ||||
| 		fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno)); | ||||
| 		exit(1); | ||||
| 	} | ||||
| 
 | ||||
| 	while((dent = readdir(dir)) != NULL) { | ||||
| 		if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || strcmp(dent->d_name, "user") == 0) { | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		// Get and open the namespace for the init we are joining.. | ||||
| 		char buf[kBufSize]; | ||||
| 		memset(buf, 0, kBufSize); | ||||
| 		strncat(buf, ns_dir, kBufSize - 1); | ||||
| 		strncat(buf, dent->d_name, kBufSize - 1); | ||||
| 		int fd = open(buf, O_RDONLY); | ||||
| 		if (fd == -1) { | ||||
| 			fprintf(stderr, "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", buf, dent->d_name, strerror(errno)); | ||||
| 			exit(1); | ||||
| 		} | ||||
| 
 | ||||
| 		// Set the namespace. | ||||
| 		if (setns(fd, 0) == -1) { | ||||
| 			fprintf(stderr, "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", dent->d_name, strerror(errno)); | ||||
| 			exit(1); | ||||
| 		} | ||||
| 		close(fd); | ||||
| 	} | ||||
| 	closedir(dir); | ||||
| 
 | ||||
| 	// We must fork to actually enter the PID namespace. | ||||
| 	int child = fork(); | ||||
| 	if (child == 0) { | ||||
| 		// Finish executing, let the Go runtime take over. | ||||
| 		return; | ||||
| 	} else { | ||||
| 		// Parent, wait for the child. | ||||
| 		int status = 0; | ||||
| 		if (waitpid(child, &status, 0) == -1) { | ||||
| 			fprintf(stderr, "nsenter: Failed to waitpid with error: \"%s\"\n", strerror(errno)); | ||||
| 			exit(1); | ||||
| 		} | ||||
| 
 | ||||
| 		// Forward the child's exit code or re-send its death signal. | ||||
| 		if (WIFEXITED(status)) { | ||||
| 			exit(WEXITSTATUS(status)); | ||||
| 		} else if (WIFSIGNALED(status)) { | ||||
| 			kill(getpid(), WTERMSIG(status)); | ||||
| 		} | ||||
| 		exit(1); | ||||
| 	} | ||||
| 
 | ||||
| 	return; | ||||
| } | ||||
| 
 | ||||
| __attribute__((constructor)) init() { | ||||
| 	nsenter(); | ||||
| } | ||||
| */ | ||||
| import "C" | ||||
|  | @ -1,28 +0,0 @@ | |||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| ) | ||||
| 
 | ||||
| // WritePid writes the namespaced processes pid to pid and it's start time | ||||
| // to the path specified | ||||
| func WritePid(path string, pid int, startTime string) error { | ||||
| 	err := ioutil.WriteFile(filepath.Join(path, "pid"), []byte(fmt.Sprint(pid)), 0655) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return ioutil.WriteFile(filepath.Join(path, "start"), []byte(startTime), 0655) | ||||
| } | ||||
| 
 | ||||
| // DeletePid removes the pid and started file from disk when the container's process | ||||
| // dies and the container is cleanly removed | ||||
| func DeletePid(path string) error { | ||||
| 	err := os.Remove(filepath.Join(path, "pid")) | ||||
| 	if serr := os.Remove(filepath.Join(path, "start")); err == nil { | ||||
| 		err = serr | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | @ -1,49 +0,0 @@ | |||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| ) | ||||
| 
 | ||||
| type StdTerminal struct { | ||||
| 	stdin          io.Reader | ||||
| 	stdout, stderr io.Writer | ||||
| } | ||||
| 
 | ||||
| func (s *StdTerminal) SetMaster(*os.File) { | ||||
| 	// no need to set master on non tty | ||||
| } | ||||
| 
 | ||||
| func (s *StdTerminal) Close() error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *StdTerminal) Resize(h, w int) error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *StdTerminal) Attach(command *exec.Cmd) error { | ||||
| 	inPipe, err := command.StdinPipe() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	outPipe, err := command.StdoutPipe() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	errPipe, err := command.StderrPipe() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	go func() { | ||||
| 		defer inPipe.Close() | ||||
| 		io.Copy(inPipe, s.stdin) | ||||
| 	}() | ||||
| 
 | ||||
| 	go io.Copy(s.stdout, outPipe) | ||||
| 	go io.Copy(s.stderr, errPipe) | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,80 +0,0 @@ | |||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| ) | ||||
| 
 | ||||
| // SyncPipe allows communication to and from the child processes | ||||
| // to it's parent and allows the two independent processes to | ||||
| // syncronize their state. | ||||
| type SyncPipe struct { | ||||
| 	parent, child *os.File | ||||
| } | ||||
| 
 | ||||
| func NewSyncPipe() (s *SyncPipe, err error) { | ||||
| 	s = &SyncPipe{} | ||||
| 	s.child, s.parent, err = os.Pipe() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return s, nil | ||||
| } | ||||
| 
 | ||||
| func NewSyncPipeFromFd(parendFd, childFd uintptr) (*SyncPipe, error) { | ||||
| 	s := &SyncPipe{} | ||||
| 	if parendFd > 0 { | ||||
| 		s.parent = os.NewFile(parendFd, "parendPipe") | ||||
| 	} else if childFd > 0 { | ||||
| 		s.child = os.NewFile(childFd, "childPipe") | ||||
| 	} else { | ||||
| 		return nil, fmt.Errorf("no valid sync pipe fd specified") | ||||
| 	} | ||||
| 	return s, nil | ||||
| } | ||||
| 
 | ||||
| func (s *SyncPipe) Child() *os.File { | ||||
| 	return s.child | ||||
| } | ||||
| 
 | ||||
| func (s *SyncPipe) Parent() *os.File { | ||||
| 	return s.parent | ||||
| } | ||||
| 
 | ||||
| func (s *SyncPipe) SendToChild(context libcontainer.Context) error { | ||||
| 	data, err := json.Marshal(context) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	s.parent.Write(data) | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *SyncPipe) ReadFromParent() (libcontainer.Context, error) { | ||||
| 	data, err := ioutil.ReadAll(s.child) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("error reading from sync pipe %s", err) | ||||
| 	} | ||||
| 	var context libcontainer.Context | ||||
| 	if len(data) > 0 { | ||||
| 		if err := json.Unmarshal(data, &context); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	return context, nil | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| func (s *SyncPipe) Close() error { | ||||
| 	if s.parent != nil { | ||||
| 		s.parent.Close() | ||||
| 	} | ||||
| 	if s.child != nil { | ||||
| 		s.child.Close() | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,29 +0,0 @@ | |||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| ) | ||||
| 
 | ||||
| type Terminal interface { | ||||
| 	io.Closer | ||||
| 	SetMaster(*os.File) | ||||
| 	Attach(*exec.Cmd) error | ||||
| 	Resize(h, w int) error | ||||
| } | ||||
| 
 | ||||
| func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal { | ||||
| 	if tty { | ||||
| 		return &TtyTerminal{ | ||||
| 			stdin:  stdin, | ||||
| 			stdout: stdout, | ||||
| 			stderr: stderr, | ||||
| 		} | ||||
| 	} | ||||
| 	return &StdTerminal{ | ||||
| 		stdin:  stdin, | ||||
| 		stdout: stdout, | ||||
| 		stderr: stderr, | ||||
| 	} | ||||
| } | ||||
|  | @ -1,56 +0,0 @@ | |||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/term" | ||||
| ) | ||||
| 
 | ||||
| type TtyTerminal struct { | ||||
| 	stdin          io.Reader | ||||
| 	stdout, stderr io.Writer | ||||
| 	master         *os.File | ||||
| 	state          *term.State | ||||
| } | ||||
| 
 | ||||
| func (t *TtyTerminal) Resize(h, w int) error { | ||||
| 	return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) | ||||
| } | ||||
| 
 | ||||
| func (t *TtyTerminal) SetMaster(master *os.File) { | ||||
| 	t.master = master | ||||
| } | ||||
| 
 | ||||
| func (t *TtyTerminal) Attach(command *exec.Cmd) error { | ||||
| 	go io.Copy(t.stdout, t.master) | ||||
| 	go io.Copy(t.master, t.stdin) | ||||
| 
 | ||||
| 	state, err := t.setupWindow(t.master, os.Stdin) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	t.state = state | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| // SetupWindow gets the parent window size and sets the master | ||||
| // pty to the current size and set the parents mode to RAW | ||||
| func (t *TtyTerminal) setupWindow(master, parent *os.File) (*term.State, error) { | ||||
| 	ws, err := term.GetWinsize(parent.Fd()) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if err := term.SetWinsize(master.Fd(), ws); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return term.SetRawTerminal(parent.Fd()) | ||||
| } | ||||
| 
 | ||||
| func (t *TtyTerminal) Close() error { | ||||
| 	term.RestoreTerminal(os.Stdin.Fd(), t.state) | ||||
| 	return t.master.Close() | ||||
| } | ||||
|  | @ -1,28 +0,0 @@ | |||
| // +build !linux | ||||
| 
 | ||||
| package namespaces | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups" | ||||
| ) | ||||
| 
 | ||||
| func Exec(container *libcontainer.Container, term Terminal, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { | ||||
| 	return -1, libcontainer.ErrUnsupported | ||||
| } | ||||
| 
 | ||||
| func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error { | ||||
| 	return libcontainer.ErrUnsupported | ||||
| } | ||||
| 
 | ||||
| func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { | ||||
| 	return libcontainer.ErrUnsupported | ||||
| } | ||||
| 
 | ||||
| func SetupCgroups(container *libcontainer.Container, nspid int) (cgroups.ActiveCgroup, error) { | ||||
| 	return nil, libcontainer.ErrUnsupported | ||||
| } | ||||
| 
 | ||||
| func GetNamespaceFlags(namespaces map[string]bool) (flag int) { | ||||
| 	return 0 | ||||
| } | ||||
|  | @ -1,2 +0,0 @@ | |||
| Michael Crosby <michael@crosbymichael.com> (@crosbymichael) | ||||
| Guillaume J. Charmes <guillaume@docker.com> (@creack) | ||||
|  | @ -1,23 +0,0 @@ | |||
| // Packet netlink provide access to low level Netlink sockets and messages. | ||||
| // | ||||
| // Actual implementations are in: | ||||
| // netlink_linux.go | ||||
| // netlink_darwin.go | ||||
| package netlink | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"net" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	ErrWrongSockType = errors.New("Wrong socket type") | ||||
| 	ErrShortResponse = errors.New("Got short response from netlink") | ||||
| ) | ||||
| 
 | ||||
| // A Route is a subnet associated with the interface to reach it. | ||||
| type Route struct { | ||||
| 	*net.IPNet | ||||
| 	Iface   *net.Interface | ||||
| 	Default bool | ||||
| } | ||||
|  | @ -1,964 +0,0 @@ | |||
| // +build amd64 | ||||
| 
 | ||||
| package netlink | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"fmt" | ||||
| 	"math/rand" | ||||
| 	"net" | ||||
| 	"sync/atomic" | ||||
| 	"syscall" | ||||
| 	"unsafe" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	IFNAMSIZ       = 16 | ||||
| 	DEFAULT_CHANGE = 0xFFFFFFFF | ||||
| 	IFLA_INFO_KIND = 1 | ||||
| 	IFLA_INFO_DATA = 2 | ||||
| 	VETH_INFO_PEER = 1 | ||||
| 	IFLA_NET_NS_FD = 28 | ||||
| 	SIOC_BRADDBR   = 0x89a0 | ||||
| 	SIOC_BRADDIF   = 0x89a2 | ||||
| ) | ||||
| 
 | ||||
| var nextSeqNr uint32 | ||||
| 
 | ||||
| type ifreqHwaddr struct { | ||||
| 	IfrnName   [16]byte | ||||
| 	IfruHwaddr syscall.RawSockaddr | ||||
| } | ||||
| 
 | ||||
| type ifreqIndex struct { | ||||
| 	IfrnName  [16]byte | ||||
| 	IfruIndex int32 | ||||
| } | ||||
| 
 | ||||
| func nativeEndian() binary.ByteOrder { | ||||
| 	var x uint32 = 0x01020304 | ||||
| 	if *(*byte)(unsafe.Pointer(&x)) == 0x01 { | ||||
| 		return binary.BigEndian | ||||
| 	} | ||||
| 	return binary.LittleEndian | ||||
| } | ||||
| 
 | ||||
| func getIpFamily(ip net.IP) int { | ||||
| 	if len(ip) <= net.IPv4len { | ||||
| 		return syscall.AF_INET | ||||
| 	} | ||||
| 	if ip.To4() != nil { | ||||
| 		return syscall.AF_INET | ||||
| 	} | ||||
| 	return syscall.AF_INET6 | ||||
| } | ||||
| 
 | ||||
| type NetlinkRequestData interface { | ||||
| 	Len() int | ||||
| 	ToWireFormat() []byte | ||||
| } | ||||
| 
 | ||||
| type IfInfomsg struct { | ||||
| 	syscall.IfInfomsg | ||||
| } | ||||
| 
 | ||||
| func newIfInfomsg(family int) *IfInfomsg { | ||||
| 	return &IfInfomsg{ | ||||
| 		IfInfomsg: syscall.IfInfomsg{ | ||||
| 			Family: uint8(family), | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func newIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { | ||||
| 	msg := newIfInfomsg(family) | ||||
| 	parent.children = append(parent.children, msg) | ||||
| 	return msg | ||||
| } | ||||
| 
 | ||||
| func (msg *IfInfomsg) ToWireFormat() []byte { | ||||
| 	native := nativeEndian() | ||||
| 
 | ||||
| 	length := syscall.SizeofIfInfomsg | ||||
| 	b := make([]byte, length) | ||||
| 	b[0] = msg.Family | ||||
| 	b[1] = 0 | ||||
| 	native.PutUint16(b[2:4], msg.Type) | ||||
| 	native.PutUint32(b[4:8], uint32(msg.Index)) | ||||
| 	native.PutUint32(b[8:12], msg.Flags) | ||||
| 	native.PutUint32(b[12:16], msg.Change) | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| func (msg *IfInfomsg) Len() int { | ||||
| 	return syscall.SizeofIfInfomsg | ||||
| } | ||||
| 
 | ||||
| type IfAddrmsg struct { | ||||
| 	syscall.IfAddrmsg | ||||
| } | ||||
| 
 | ||||
| func newIfAddrmsg(family int) *IfAddrmsg { | ||||
| 	return &IfAddrmsg{ | ||||
| 		IfAddrmsg: syscall.IfAddrmsg{ | ||||
| 			Family: uint8(family), | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (msg *IfAddrmsg) ToWireFormat() []byte { | ||||
| 	native := nativeEndian() | ||||
| 
 | ||||
| 	length := syscall.SizeofIfAddrmsg | ||||
| 	b := make([]byte, length) | ||||
| 	b[0] = msg.Family | ||||
| 	b[1] = msg.Prefixlen | ||||
| 	b[2] = msg.Flags | ||||
| 	b[3] = msg.Scope | ||||
| 	native.PutUint32(b[4:8], msg.Index) | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| func (msg *IfAddrmsg) Len() int { | ||||
| 	return syscall.SizeofIfAddrmsg | ||||
| } | ||||
| 
 | ||||
| type RtMsg struct { | ||||
| 	syscall.RtMsg | ||||
| } | ||||
| 
 | ||||
| func newRtMsg() *RtMsg { | ||||
| 	return &RtMsg{ | ||||
| 		RtMsg: syscall.RtMsg{ | ||||
| 			Table:    syscall.RT_TABLE_MAIN, | ||||
| 			Scope:    syscall.RT_SCOPE_UNIVERSE, | ||||
| 			Protocol: syscall.RTPROT_BOOT, | ||||
| 			Type:     syscall.RTN_UNICAST, | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (msg *RtMsg) ToWireFormat() []byte { | ||||
| 	native := nativeEndian() | ||||
| 
 | ||||
| 	length := syscall.SizeofRtMsg | ||||
| 	b := make([]byte, length) | ||||
| 	b[0] = msg.Family | ||||
| 	b[1] = msg.Dst_len | ||||
| 	b[2] = msg.Src_len | ||||
| 	b[3] = msg.Tos | ||||
| 	b[4] = msg.Table | ||||
| 	b[5] = msg.Protocol | ||||
| 	b[6] = msg.Scope | ||||
| 	b[7] = msg.Type | ||||
| 	native.PutUint32(b[8:12], msg.Flags) | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| func (msg *RtMsg) Len() int { | ||||
| 	return syscall.SizeofRtMsg | ||||
| } | ||||
| 
 | ||||
| func rtaAlignOf(attrlen int) int { | ||||
| 	return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1) | ||||
| } | ||||
| 
 | ||||
| type RtAttr struct { | ||||
| 	syscall.RtAttr | ||||
| 	Data     []byte | ||||
| 	children []NetlinkRequestData | ||||
| } | ||||
| 
 | ||||
| func newRtAttr(attrType int, data []byte) *RtAttr { | ||||
| 	return &RtAttr{ | ||||
| 		RtAttr: syscall.RtAttr{ | ||||
| 			Type: uint16(attrType), | ||||
| 		}, | ||||
| 		children: []NetlinkRequestData{}, | ||||
| 		Data:     data, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func newRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr { | ||||
| 	attr := newRtAttr(attrType, data) | ||||
| 	parent.children = append(parent.children, attr) | ||||
| 	return attr | ||||
| } | ||||
| 
 | ||||
| func (a *RtAttr) Len() int { | ||||
| 	l := 0 | ||||
| 	for _, child := range a.children { | ||||
| 		l += child.Len() + syscall.SizeofRtAttr | ||||
| 	} | ||||
| 	if l == 0 { | ||||
| 		l++ | ||||
| 	} | ||||
| 	return rtaAlignOf(l + len(a.Data)) | ||||
| } | ||||
| 
 | ||||
| func (a *RtAttr) ToWireFormat() []byte { | ||||
| 	native := nativeEndian() | ||||
| 
 | ||||
| 	length := a.Len() | ||||
| 	buf := make([]byte, rtaAlignOf(length+syscall.SizeofRtAttr)) | ||||
| 
 | ||||
| 	if a.Data != nil { | ||||
| 		copy(buf[4:], a.Data) | ||||
| 	} else { | ||||
| 		next := 4 | ||||
| 		for _, child := range a.children { | ||||
| 			childBuf := child.ToWireFormat() | ||||
| 			copy(buf[next:], childBuf) | ||||
| 			next += rtaAlignOf(len(childBuf)) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if l := uint16(rtaAlignOf(length)); l != 0 { | ||||
| 		native.PutUint16(buf[0:2], l+1) | ||||
| 	} | ||||
| 	native.PutUint16(buf[2:4], a.Type) | ||||
| 
 | ||||
| 	return buf | ||||
| } | ||||
| 
 | ||||
| type NetlinkRequest struct { | ||||
| 	syscall.NlMsghdr | ||||
| 	Data []NetlinkRequestData | ||||
| } | ||||
| 
 | ||||
| func (rr *NetlinkRequest) ToWireFormat() []byte { | ||||
| 	native := nativeEndian() | ||||
| 
 | ||||
| 	length := rr.Len | ||||
| 	dataBytes := make([][]byte, len(rr.Data)) | ||||
| 	for i, data := range rr.Data { | ||||
| 		dataBytes[i] = data.ToWireFormat() | ||||
| 		length += uint32(len(dataBytes[i])) | ||||
| 	} | ||||
| 	b := make([]byte, length) | ||||
| 	native.PutUint32(b[0:4], length) | ||||
| 	native.PutUint16(b[4:6], rr.Type) | ||||
| 	native.PutUint16(b[6:8], rr.Flags) | ||||
| 	native.PutUint32(b[8:12], rr.Seq) | ||||
| 	native.PutUint32(b[12:16], rr.Pid) | ||||
| 
 | ||||
| 	next := 16 | ||||
| 	for _, data := range dataBytes { | ||||
| 		copy(b[next:], data) | ||||
| 		next += len(data) | ||||
| 	} | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| func (rr *NetlinkRequest) AddData(data NetlinkRequestData) { | ||||
| 	if data != nil { | ||||
| 		rr.Data = append(rr.Data, data) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func newNetlinkRequest(proto, flags int) *NetlinkRequest { | ||||
| 	return &NetlinkRequest{ | ||||
| 		NlMsghdr: syscall.NlMsghdr{ | ||||
| 			Len:   uint32(syscall.NLMSG_HDRLEN), | ||||
| 			Type:  uint16(proto), | ||||
| 			Flags: syscall.NLM_F_REQUEST | uint16(flags), | ||||
| 			Seq:   atomic.AddUint32(&nextSeqNr, 1), | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| type NetlinkSocket struct { | ||||
| 	fd  int | ||||
| 	lsa syscall.SockaddrNetlink | ||||
| } | ||||
| 
 | ||||
| func getNetlinkSocket() (*NetlinkSocket, error) { | ||||
| 	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_ROUTE) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	s := &NetlinkSocket{ | ||||
| 		fd: fd, | ||||
| 	} | ||||
| 	s.lsa.Family = syscall.AF_NETLINK | ||||
| 	if err := syscall.Bind(fd, &s.lsa); err != nil { | ||||
| 		syscall.Close(fd) | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return s, nil | ||||
| } | ||||
| 
 | ||||
| func (s *NetlinkSocket) Close() { | ||||
| 	syscall.Close(s.fd) | ||||
| } | ||||
| 
 | ||||
| func (s *NetlinkSocket) Send(request *NetlinkRequest) error { | ||||
| 	if err := syscall.Sendto(s.fd, request.ToWireFormat(), 0, &s.lsa); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { | ||||
| 	rb := make([]byte, syscall.Getpagesize()) | ||||
| 	nr, _, err := syscall.Recvfrom(s.fd, rb, 0) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if nr < syscall.NLMSG_HDRLEN { | ||||
| 		return nil, ErrShortResponse | ||||
| 	} | ||||
| 	rb = rb[:nr] | ||||
| 	return syscall.ParseNetlinkMessage(rb) | ||||
| } | ||||
| 
 | ||||
| func (s *NetlinkSocket) GetPid() (uint32, error) { | ||||
| 	lsa, err := syscall.Getsockname(s.fd) | ||||
| 	if err != nil { | ||||
| 		return 0, err | ||||
| 	} | ||||
| 	switch v := lsa.(type) { | ||||
| 	case *syscall.SockaddrNetlink: | ||||
| 		return v.Pid, nil | ||||
| 	} | ||||
| 	return 0, ErrWrongSockType | ||||
| } | ||||
| 
 | ||||
| func (s *NetlinkSocket) HandleAck(seq uint32) error { | ||||
| 	native := nativeEndian() | ||||
| 
 | ||||
| 	pid, err := s.GetPid() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| done: | ||||
| 	for { | ||||
| 		msgs, err := s.Receive() | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		for _, m := range msgs { | ||||
| 			if m.Header.Seq != seq { | ||||
| 				return fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, seq) | ||||
| 			} | ||||
| 			if m.Header.Pid != pid { | ||||
| 				return fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid) | ||||
| 			} | ||||
| 			if m.Header.Type == syscall.NLMSG_DONE { | ||||
| 				break done | ||||
| 			} | ||||
| 			if m.Header.Type == syscall.NLMSG_ERROR { | ||||
| 				error := int32(native.Uint32(m.Data[0:4])) | ||||
| 				if error == 0 { | ||||
| 					break done | ||||
| 				} | ||||
| 				return syscall.Errno(-error) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Add a new route table entry. | ||||
| func AddRoute(destination, source, gateway, device string) error { | ||||
| 	if destination == "" && source == "" && gateway == "" { | ||||
| 		return fmt.Errorf("one of destination, source or gateway must not be blank") | ||||
| 	} | ||||
| 
 | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) | ||||
| 	msg := newRtMsg() | ||||
| 	currentFamily := -1 | ||||
| 	var rtAttrs []*RtAttr | ||||
| 
 | ||||
| 	if destination != "" { | ||||
| 		destIP, destNet, err := net.ParseCIDR(destination) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("destination CIDR %s couldn't be parsed", destination) | ||||
| 		} | ||||
| 		destFamily := getIpFamily(destIP) | ||||
| 		currentFamily = destFamily | ||||
| 		destLen, bits := destNet.Mask.Size() | ||||
| 		if destLen == 0 && bits == 0 { | ||||
| 			return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination) | ||||
| 		} | ||||
| 		msg.Family = uint8(destFamily) | ||||
| 		msg.Dst_len = uint8(destLen) | ||||
| 		var destData []byte | ||||
| 		if destFamily == syscall.AF_INET { | ||||
| 			destData = destIP.To4() | ||||
| 		} else { | ||||
| 			destData = destIP.To16() | ||||
| 		} | ||||
| 		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData)) | ||||
| 	} | ||||
| 
 | ||||
| 	if source != "" { | ||||
| 		srcIP, srcNet, err := net.ParseCIDR(source) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("source CIDR %s couldn't be parsed", source) | ||||
| 		} | ||||
| 		srcFamily := getIpFamily(srcIP) | ||||
| 		if currentFamily != -1 && currentFamily != srcFamily { | ||||
| 			return fmt.Errorf("source and destination ip were not the same IP family") | ||||
| 		} | ||||
| 		currentFamily = srcFamily | ||||
| 		srcLen, bits := srcNet.Mask.Size() | ||||
| 		if srcLen == 0 && bits == 0 { | ||||
| 			return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source) | ||||
| 		} | ||||
| 		msg.Family = uint8(srcFamily) | ||||
| 		msg.Src_len = uint8(srcLen) | ||||
| 		var srcData []byte | ||||
| 		if srcFamily == syscall.AF_INET { | ||||
| 			srcData = srcIP.To4() | ||||
| 		} else { | ||||
| 			srcData = srcIP.To16() | ||||
| 		} | ||||
| 		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData)) | ||||
| 	} | ||||
| 
 | ||||
| 	if gateway != "" { | ||||
| 		gwIP := net.ParseIP(gateway) | ||||
| 		if gwIP == nil { | ||||
| 			return fmt.Errorf("gateway IP %s couldn't be parsed", gateway) | ||||
| 		} | ||||
| 		gwFamily := getIpFamily(gwIP) | ||||
| 		if currentFamily != -1 && currentFamily != gwFamily { | ||||
| 			return fmt.Errorf("gateway, source, and destination ip were not the same IP family") | ||||
| 		} | ||||
| 		msg.Family = uint8(gwFamily) | ||||
| 		var gwData []byte | ||||
| 		if gwFamily == syscall.AF_INET { | ||||
| 			gwData = gwIP.To4() | ||||
| 		} else { | ||||
| 			gwData = gwIP.To16() | ||||
| 		} | ||||
| 		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData)) | ||||
| 	} | ||||
| 
 | ||||
| 	wb.AddData(msg) | ||||
| 	for _, attr := range rtAttrs { | ||||
| 		wb.AddData(attr) | ||||
| 	} | ||||
| 
 | ||||
| 	var ( | ||||
| 		native = nativeEndian() | ||||
| 		b      = make([]byte, 4) | ||||
| 	) | ||||
| 	iface, err := net.InterfaceByName(device) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	native.PutUint32(b, uint32(iface.Index)) | ||||
| 
 | ||||
| 	wb.AddData(newRtAttr(syscall.RTA_OIF, b)) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| // Add a new default gateway. Identical to: | ||||
| // ip route add default via $ip | ||||
| func AddDefaultGw(ip, device string) error { | ||||
| 	return AddRoute("", "", ip, device) | ||||
| } | ||||
| 
 | ||||
| // Bring up a particular network interface | ||||
| func NetworkLinkUp(iface *net.Interface) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	msg.Change = syscall.IFF_UP | ||||
| 	msg.Flags = syscall.IFF_UP | ||||
| 	msg.Index = int32(iface.Index) | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| func NetworkLinkDown(iface *net.Interface) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	msg.Change = syscall.IFF_UP | ||||
| 	msg.Flags = 0 & ^syscall.IFF_UP | ||||
| 	msg.Index = int32(iface.Index) | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| func NetworkSetMTU(iface *net.Interface, mtu int) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	msg.Type = syscall.RTM_SETLINK | ||||
| 	msg.Flags = syscall.NLM_F_REQUEST | ||||
| 	msg.Index = int32(iface.Index) | ||||
| 	msg.Change = DEFAULT_CHANGE | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	var ( | ||||
| 		b      = make([]byte, 4) | ||||
| 		native = nativeEndian() | ||||
| 	) | ||||
| 	native.PutUint32(b, uint32(mtu)) | ||||
| 
 | ||||
| 	data := newRtAttr(syscall.IFLA_MTU, b) | ||||
| 	wb.AddData(data) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| // same as ip link set $name master $master | ||||
| func NetworkSetMaster(iface, master *net.Interface) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	msg.Type = syscall.RTM_SETLINK | ||||
| 	msg.Flags = syscall.NLM_F_REQUEST | ||||
| 	msg.Index = int32(iface.Index) | ||||
| 	msg.Change = DEFAULT_CHANGE | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	var ( | ||||
| 		b      = make([]byte, 4) | ||||
| 		native = nativeEndian() | ||||
| 	) | ||||
| 	native.PutUint32(b, uint32(master.Index)) | ||||
| 
 | ||||
| 	data := newRtAttr(syscall.IFLA_MASTER, b) | ||||
| 	wb.AddData(data) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| func NetworkSetNsPid(iface *net.Interface, nspid int) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	msg.Type = syscall.RTM_SETLINK | ||||
| 	msg.Flags = syscall.NLM_F_REQUEST | ||||
| 	msg.Index = int32(iface.Index) | ||||
| 	msg.Change = DEFAULT_CHANGE | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	var ( | ||||
| 		b      = make([]byte, 4) | ||||
| 		native = nativeEndian() | ||||
| 	) | ||||
| 	native.PutUint32(b, uint32(nspid)) | ||||
| 
 | ||||
| 	data := newRtAttr(syscall.IFLA_NET_NS_PID, b) | ||||
| 	wb.AddData(data) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| func NetworkSetNsFd(iface *net.Interface, fd int) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	msg.Type = syscall.RTM_SETLINK | ||||
| 	msg.Flags = syscall.NLM_F_REQUEST | ||||
| 	msg.Index = int32(iface.Index) | ||||
| 	msg.Change = DEFAULT_CHANGE | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	var ( | ||||
| 		b      = make([]byte, 4) | ||||
| 		native = nativeEndian() | ||||
| 	) | ||||
| 	native.PutUint32(b, uint32(fd)) | ||||
| 
 | ||||
| 	data := newRtAttr(IFLA_NET_NS_FD, b) | ||||
| 	wb.AddData(data) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| // Add an Ip address to an interface. This is identical to: | ||||
| // ip addr add $ip/$ipNet dev $iface | ||||
| func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	family := getIpFamily(ip) | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfAddrmsg(family) | ||||
| 	msg.Index = uint32(iface.Index) | ||||
| 	prefixLen, _ := ipNet.Mask.Size() | ||||
| 	msg.Prefixlen = uint8(prefixLen) | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	var ipData []byte | ||||
| 	if family == syscall.AF_INET { | ||||
| 		ipData = ip.To4() | ||||
| 	} else { | ||||
| 		ipData = ip.To16() | ||||
| 	} | ||||
| 
 | ||||
| 	localData := newRtAttr(syscall.IFA_LOCAL, ipData) | ||||
| 	wb.AddData(localData) | ||||
| 
 | ||||
| 	addrData := newRtAttr(syscall.IFA_ADDRESS, ipData) | ||||
| 	wb.AddData(addrData) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| func zeroTerminated(s string) []byte { | ||||
| 	return []byte(s + "\000") | ||||
| } | ||||
| 
 | ||||
| func nonZeroTerminated(s string) []byte { | ||||
| 	return []byte(s) | ||||
| } | ||||
| 
 | ||||
| // Add a new network link of a specified type. This is identical to | ||||
| // running: ip add link $name type $linkType | ||||
| func NetworkLinkAdd(name string, linkType string) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	if name != "" { | ||||
| 		nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name)) | ||||
| 		wb.AddData(nameData) | ||||
| 	} | ||||
| 
 | ||||
| 	kindData := newRtAttr(IFLA_INFO_KIND, nonZeroTerminated(linkType)) | ||||
| 
 | ||||
| 	infoData := newRtAttr(syscall.IFLA_LINKINFO, kindData.ToWireFormat()) | ||||
| 	wb.AddData(infoData) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| // Returns an array of IPNet for all the currently routed subnets on ipv4 | ||||
| // This is similar to the first column of "ip route" output | ||||
| func NetworkGetRoutes() ([]Route, error) { | ||||
| 	native := nativeEndian() | ||||
| 
 | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	pid, err := s.GetPid() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	res := make([]Route, 0) | ||||
| 
 | ||||
| done: | ||||
| 	for { | ||||
| 		msgs, err := s.Receive() | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		for _, m := range msgs { | ||||
| 			if m.Header.Seq != wb.Seq { | ||||
| 				return nil, fmt.Errorf("Wrong Seq nr %d, expected 1", m.Header.Seq) | ||||
| 			} | ||||
| 			if m.Header.Pid != pid { | ||||
| 				return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid) | ||||
| 			} | ||||
| 			if m.Header.Type == syscall.NLMSG_DONE { | ||||
| 				break done | ||||
| 			} | ||||
| 			if m.Header.Type == syscall.NLMSG_ERROR { | ||||
| 				error := int32(native.Uint32(m.Data[0:4])) | ||||
| 				if error == 0 { | ||||
| 					break done | ||||
| 				} | ||||
| 				return nil, syscall.Errno(-error) | ||||
| 			} | ||||
| 			if m.Header.Type != syscall.RTM_NEWROUTE { | ||||
| 				continue | ||||
| 			} | ||||
| 
 | ||||
| 			var r Route | ||||
| 
 | ||||
| 			msg := (*RtMsg)(unsafe.Pointer(&m.Data[0:syscall.SizeofRtMsg][0])) | ||||
| 
 | ||||
| 			if msg.Flags&syscall.RTM_F_CLONED != 0 { | ||||
| 				// Ignore cloned routes | ||||
| 				continue | ||||
| 			} | ||||
| 
 | ||||
| 			if msg.Table != syscall.RT_TABLE_MAIN { | ||||
| 				// Ignore non-main tables | ||||
| 				continue | ||||
| 			} | ||||
| 
 | ||||
| 			if msg.Family != syscall.AF_INET { | ||||
| 				// Ignore non-ipv4 routes | ||||
| 				continue | ||||
| 			} | ||||
| 
 | ||||
| 			if msg.Dst_len == 0 { | ||||
| 				// Default routes | ||||
| 				r.Default = true | ||||
| 			} | ||||
| 
 | ||||
| 			attrs, err := syscall.ParseNetlinkRouteAttr(&m) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 			for _, attr := range attrs { | ||||
| 				switch attr.Attr.Type { | ||||
| 				case syscall.RTA_DST: | ||||
| 					ip := attr.Value | ||||
| 					r.IPNet = &net.IPNet{ | ||||
| 						IP:   ip, | ||||
| 						Mask: net.CIDRMask(int(msg.Dst_len), 8*len(ip)), | ||||
| 					} | ||||
| 				case syscall.RTA_OIF: | ||||
| 					index := int(native.Uint32(attr.Value[0:4])) | ||||
| 					r.Iface, _ = net.InterfaceByIndex(index) | ||||
| 				} | ||||
| 			} | ||||
| 			if r.Default || r.IPNet != nil { | ||||
| 				res = append(res, r) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return res, nil | ||||
| } | ||||
| 
 | ||||
| func getIfSocket() (fd int, err error) { | ||||
| 	for _, socket := range []int{ | ||||
| 		syscall.AF_INET, | ||||
| 		syscall.AF_PACKET, | ||||
| 		syscall.AF_INET6, | ||||
| 	} { | ||||
| 		if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if err == nil { | ||||
| 		return fd, nil | ||||
| 	} | ||||
| 	return -1, err | ||||
| } | ||||
| 
 | ||||
| func NetworkChangeName(iface *net.Interface, newName string) error { | ||||
| 	fd, err := getIfSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer syscall.Close(fd) | ||||
| 
 | ||||
| 	data := [IFNAMSIZ * 2]byte{} | ||||
| 	// the "-1"s here are very important for ensuring we get proper null | ||||
| 	// termination of our new C strings | ||||
| 	copy(data[:IFNAMSIZ-1], iface.Name) | ||||
| 	copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName) | ||||
| 
 | ||||
| 	if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 { | ||||
| 		return errno | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func NetworkCreateVethPair(name1, name2 string) error { | ||||
| 	s, err := getNetlinkSocket() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer s.Close() | ||||
| 
 | ||||
| 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) | ||||
| 
 | ||||
| 	msg := newIfInfomsg(syscall.AF_UNSPEC) | ||||
| 	wb.AddData(msg) | ||||
| 
 | ||||
| 	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1)) | ||||
| 	wb.AddData(nameData) | ||||
| 
 | ||||
| 	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) | ||||
| 	newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth")) | ||||
| 	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) | ||||
| 	nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil) | ||||
| 
 | ||||
| 	newIfInfomsgChild(nest3, syscall.AF_UNSPEC) | ||||
| 	newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2)) | ||||
| 
 | ||||
| 	wb.AddData(nest1) | ||||
| 
 | ||||
| 	if err := s.Send(wb); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return s.HandleAck(wb.Seq) | ||||
| } | ||||
| 
 | ||||
| // Create the actual bridge device.  This is more backward-compatible than | ||||
| // netlink.NetworkLinkAdd and works on RHEL 6. | ||||
| func CreateBridge(name string, setMacAddr bool) error { | ||||
| 	s, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_IP) | ||||
| 	if err != nil { | ||||
| 		// ipv6 issue, creating with ipv4 | ||||
| 		s, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	defer syscall.Close(s) | ||||
| 
 | ||||
| 	nameBytePtr, err := syscall.BytePtrFromString(name) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 { | ||||
| 		return err | ||||
| 	} | ||||
| 	if setMacAddr { | ||||
| 		return setBridgeMacAddress(s, name) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Add a slave to abridge device.  This is more backward-compatible than | ||||
| // netlink.NetworkSetMaster and works on RHEL 6. | ||||
| func AddToBridge(iface, master *net.Interface) error { | ||||
| 	s, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_IP) | ||||
| 	if err != nil { | ||||
| 		// ipv6 issue, creating with ipv4 | ||||
| 		s, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	defer syscall.Close(s) | ||||
| 
 | ||||
| 	ifr := ifreqIndex{} | ||||
| 	copy(ifr.IfrnName[:], master.Name) | ||||
| 	ifr.IfruIndex = int32(iface.Index) | ||||
| 
 | ||||
| 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDIF, uintptr(unsafe.Pointer(&ifr))); err != 0 { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func setBridgeMacAddress(s int, name string) error { | ||||
| 	ifr := ifreqHwaddr{} | ||||
| 	ifr.IfruHwaddr.Family = syscall.ARPHRD_ETHER | ||||
| 	copy(ifr.IfrnName[:], name) | ||||
| 
 | ||||
| 	for i := 0; i < 6; i++ { | ||||
| 		ifr.IfruHwaddr.Data[i] = int8(rand.Intn(255)) | ||||
| 	} | ||||
| 
 | ||||
| 	ifr.IfruHwaddr.Data[0] &^= 0x1 // clear multicast bit | ||||
| 	ifr.IfruHwaddr.Data[0] |= 0x2  // set local assignment bit (IEEE802) | ||||
| 
 | ||||
| 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFHWADDR, uintptr(unsafe.Pointer(&ifr))); err != 0 { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,72 +0,0 @@ | |||
| // +build !linux !amd64 | ||||
| 
 | ||||
| package netlink | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"net" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	ErrNotImplemented = errors.New("not implemented") | ||||
| ) | ||||
| 
 | ||||
| func NetworkGetRoutes() ([]Route, error) { | ||||
| 	return nil, ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkLinkAdd(name string, linkType string) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkLinkUp(iface *net.Interface) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func AddRoute(destination, source, gateway, device string) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func AddDefaultGw(ip, device string) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkSetMTU(iface *net.Interface, mtu int) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkCreateVethPair(name1, name2 string) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkChangeName(iface *net.Interface, newName string) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkSetNsFd(iface *net.Interface, fd int) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkSetNsPid(iface *net.Interface, nspid int) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkSetMaster(iface, master *net.Interface) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func NetworkLinkDown(iface *net.Interface) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func CreateBridge(name string, setMacAddr bool) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func AddToBridge(iface, master *net.Interface) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
|  | @ -1,24 +0,0 @@ | |||
| package network | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| ) | ||||
| 
 | ||||
| // Loopback is a network strategy that provides a basic loopback device | ||||
| type Loopback struct { | ||||
| } | ||||
| 
 | ||||
| func (l *Loopback) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (l *Loopback) Initialize(config *libcontainer.Network, context libcontainer.Context) error { | ||||
| 	if err := SetMtu("lo", config.Mtu); err != nil { | ||||
| 		return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) | ||||
| 	} | ||||
| 	if err := InterfaceUp("lo"); err != nil { | ||||
| 		return fmt.Errorf("lo up %s", err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,34 +0,0 @@ | |||
| package network | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| //  crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace | ||||
| type NetNS struct { | ||||
| } | ||||
| 
 | ||||
| func (v *NetNS) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error { | ||||
| 	context["nspath"] = n.Context["nspath"] | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (v *NetNS) Initialize(config *libcontainer.Network, context libcontainer.Context) error { | ||||
| 	nspath, exists := context["nspath"] | ||||
| 	if !exists { | ||||
| 		return fmt.Errorf("nspath does not exist in network context") | ||||
| 	} | ||||
| 	f, err := os.OpenFile(nspath, os.O_RDONLY, 0) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("failed get network namespace fd: %v", err) | ||||
| 	} | ||||
| 	if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil { | ||||
| 		return fmt.Errorf("failed to setns current network namespace: %v", err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,78 +0,0 @@ | |||
| package network | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/netlink" | ||||
| 	"net" | ||||
| ) | ||||
| 
 | ||||
| func InterfaceUp(name string) error { | ||||
| 	iface, err := net.InterfaceByName(name) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return netlink.NetworkLinkUp(iface) | ||||
| } | ||||
| 
 | ||||
| func InterfaceDown(name string) error { | ||||
| 	iface, err := net.InterfaceByName(name) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return netlink.NetworkLinkDown(iface) | ||||
| } | ||||
| 
 | ||||
| func ChangeInterfaceName(old, newName string) error { | ||||
| 	iface, err := net.InterfaceByName(old) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return netlink.NetworkChangeName(iface, newName) | ||||
| } | ||||
| 
 | ||||
| func CreateVethPair(name1, name2 string) error { | ||||
| 	return netlink.NetworkCreateVethPair(name1, name2) | ||||
| } | ||||
| 
 | ||||
| func SetInterfaceInNamespacePid(name string, nsPid int) error { | ||||
| 	iface, err := net.InterfaceByName(name) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return netlink.NetworkSetNsPid(iface, nsPid) | ||||
| } | ||||
| 
 | ||||
| func SetInterfaceMaster(name, master string) error { | ||||
| 	iface, err := net.InterfaceByName(name) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	masterIface, err := net.InterfaceByName(master) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return netlink.AddToBridge(iface, masterIface) | ||||
| } | ||||
| 
 | ||||
| func SetDefaultGateway(ip, ifaceName string) error { | ||||
| 	return netlink.AddDefaultGw(ip, ifaceName) | ||||
| } | ||||
| 
 | ||||
| func SetInterfaceIp(name string, rawIp string) error { | ||||
| 	iface, err := net.InterfaceByName(name) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	ip, ipNet, err := net.ParseCIDR(rawIp) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return netlink.NetworkLinkAddIp(iface, ip, ipNet) | ||||
| } | ||||
| 
 | ||||
| func SetMtu(name string, mtu int) error { | ||||
| 	iface, err := net.InterfaceByName(name) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return netlink.NetworkSetMTU(iface, mtu) | ||||
| } | ||||
|  | @ -1,35 +0,0 @@ | |||
| package network | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	ErrNotValidStrategyType = errors.New("not a valid network strategy type") | ||||
| ) | ||||
| 
 | ||||
| var strategies = map[string]NetworkStrategy{ | ||||
| 	"veth":     &Veth{}, | ||||
| 	"loopback": &Loopback{}, | ||||
| 	"netns":    &NetNS{}, | ||||
| } | ||||
| 
 | ||||
| // NetworkStrategy represents a specific network configuration for | ||||
| // a container's networking stack | ||||
| type NetworkStrategy interface { | ||||
| 	Create(*libcontainer.Network, int, libcontainer.Context) error | ||||
| 	Initialize(*libcontainer.Network, libcontainer.Context) error | ||||
| } | ||||
| 
 | ||||
| // GetStrategy returns the specific network strategy for the | ||||
| // provided type.  If no strategy is registered for the type an | ||||
| // ErrNotValidStrategyType is returned. | ||||
| func GetStrategy(tpe string) (NetworkStrategy, error) { | ||||
| 	s, exists := strategies[tpe] | ||||
| 	if !exists { | ||||
| 		return nil, ErrNotValidStrategyType | ||||
| 	} | ||||
| 	return s, nil | ||||
| } | ||||
|  | @ -1,96 +0,0 @@ | |||
| package network | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/utils" | ||||
| ) | ||||
| 
 | ||||
| // Veth is a network strategy that uses a bridge and creates | ||||
| // a veth pair, one that stays outside on the host and the other | ||||
| // is placed inside the container's namespace | ||||
| type Veth struct { | ||||
| } | ||||
| 
 | ||||
| const defaultDevice = "eth0" | ||||
| 
 | ||||
| func (v *Veth) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error { | ||||
| 	var ( | ||||
| 		bridge string | ||||
| 		prefix string | ||||
| 		exists bool | ||||
| 	) | ||||
| 	if bridge, exists = n.Context["bridge"]; !exists { | ||||
| 		return fmt.Errorf("bridge does not exist in network context") | ||||
| 	} | ||||
| 	if prefix, exists = n.Context["prefix"]; !exists { | ||||
| 		return fmt.Errorf("veth prefix does not exist in network context") | ||||
| 	} | ||||
| 	name1, name2, err := createVethPair(prefix) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	context["veth-host"] = name1 | ||||
| 	context["veth-child"] = name2 | ||||
| 	if err := SetInterfaceMaster(name1, bridge); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := SetMtu(name1, n.Mtu); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := InterfaceUp(name1); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := SetInterfaceInNamespacePid(name2, nspid); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error { | ||||
| 	var ( | ||||
| 		vethChild string | ||||
| 		exists    bool | ||||
| 	) | ||||
| 	if vethChild, exists = context["veth-child"]; !exists { | ||||
| 		return fmt.Errorf("vethChild does not exist in network context") | ||||
| 	} | ||||
| 	if err := InterfaceDown(vethChild); err != nil { | ||||
| 		return fmt.Errorf("interface down %s %s", vethChild, err) | ||||
| 	} | ||||
| 	if err := ChangeInterfaceName(vethChild, defaultDevice); err != nil { | ||||
| 		return fmt.Errorf("change %s to %s %s", vethChild, defaultDevice, err) | ||||
| 	} | ||||
| 	if err := SetInterfaceIp(defaultDevice, config.Address); err != nil { | ||||
| 		return fmt.Errorf("set %s ip %s", defaultDevice, err) | ||||
| 	} | ||||
| 	if err := SetMtu(defaultDevice, config.Mtu); err != nil { | ||||
| 		return fmt.Errorf("set %s mtu to %d %s", defaultDevice, config.Mtu, err) | ||||
| 	} | ||||
| 	if err := InterfaceUp(defaultDevice); err != nil { | ||||
| 		return fmt.Errorf("%s up %s", defaultDevice, err) | ||||
| 	} | ||||
| 	if config.Gateway != "" { | ||||
| 		if err := SetDefaultGateway(config.Gateway, defaultDevice); err != nil { | ||||
| 			return fmt.Errorf("set gateway to %s on device %s failed with %s", config.Gateway, defaultDevice, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // createVethPair will automatically generage two random names for | ||||
| // the veth pair and ensure that they have been created | ||||
| func createVethPair(prefix string) (name1 string, name2 string, err error) { | ||||
| 	name1, err = utils.GenerateRandomName(prefix, 4) | ||||
| 	if err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	name2, err = utils.GenerateRandomName(prefix, 4) | ||||
| 	if err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	if err = CreateVethPair(name1, name2); err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | @ -1,76 +0,0 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"log" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"os/signal" | ||||
| 
 | ||||
| 	"github.com/codegangsta/cli" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/namespaces" | ||||
| ) | ||||
| 
 | ||||
| var execCommand = cli.Command{ | ||||
| 	Name:   "exec", | ||||
| 	Usage:  "execute a new command inside a container", | ||||
| 	Action: execAction, | ||||
| } | ||||
| 
 | ||||
| func execAction(context *cli.Context) { | ||||
| 	var nspid, exitCode int | ||||
| 
 | ||||
| 	container, err := loadContainer() | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	if nspid, err = readPid(); err != nil && !os.IsNotExist(err) { | ||||
| 		log.Fatalf("unable to read pid: %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if nspid > 0 { | ||||
| 		err = namespaces.ExecIn(container, nspid, []string(context.Args())) | ||||
| 	} else { | ||||
| 		term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty) | ||||
| 		exitCode, err = startContainer(container, term, dataPath, []string(context.Args())) | ||||
| 	} | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		log.Fatalf("failed to exec: %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	os.Exit(exitCode) | ||||
| } | ||||
| 
 | ||||
| // startContainer starts the container. Returns the exit status or -1 and an | ||||
| // error. | ||||
| // | ||||
| // Signals sent to the current process will be forwarded to container. | ||||
| func startContainer(container *libcontainer.Container, term namespaces.Terminal, dataPath string, args []string) (int, error) { | ||||
| 	var ( | ||||
| 		cmd  *exec.Cmd | ||||
| 		sigc = make(chan os.Signal, 10) | ||||
| 	) | ||||
| 
 | ||||
| 	signal.Notify(sigc) | ||||
| 
 | ||||
| 	createCommand := func(container *libcontainer.Container, console, rootfs, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { | ||||
| 		cmd = namespaces.DefaultCreateCommand(container, console, rootfs, dataPath, init, pipe, args) | ||||
| 		if logPath != "" { | ||||
| 			cmd.Env = append(cmd.Env, fmt.Sprintf("log=%s", logPath)) | ||||
| 		} | ||||
| 		return cmd | ||||
| 	} | ||||
| 
 | ||||
| 	startCallback := func() { | ||||
| 		go func() { | ||||
| 			for sig := range sigc { | ||||
| 				cmd.Process.Signal(sig) | ||||
| 			} | ||||
| 		}() | ||||
| 	} | ||||
| 
 | ||||
| 	return namespaces.Exec(container, term, "", dataPath, args, createCommand, startCallback) | ||||
| } | ||||
|  | @ -1,48 +0,0 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"log" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/codegangsta/cli" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/namespaces" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	dataPath  = os.Getenv("data_path") | ||||
| 	console   = os.Getenv("console") | ||||
| 	rawPipeFd = os.Getenv("pipe") | ||||
| 
 | ||||
| 	initCommand = cli.Command{ | ||||
| 		Name:   "init", | ||||
| 		Usage:  "runs the init process inside the namespace", | ||||
| 		Action: initAction, | ||||
| 	} | ||||
| ) | ||||
| 
 | ||||
| func initAction(context *cli.Context) { | ||||
| 	container, err := loadContainer() | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	rootfs, err := os.Getwd() | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	pipeFd, err := strconv.Atoi(rawPipeFd) | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	syncPipe, err := namespaces.NewSyncPipeFromFd(0, uintptr(pipeFd)) | ||||
| 	if err != nil { | ||||
| 		log.Fatalf("unable to create sync pipe: %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if err := namespaces.Init(container, rootfs, console, syncPipe, []string(context.Args())); err != nil { | ||||
| 		log.Fatalf("unable to initialize for container: %s", err) | ||||
| 	} | ||||
| } | ||||
|  | @ -1,40 +0,0 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"log" | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/codegangsta/cli" | ||||
| ) | ||||
| 
 | ||||
| var logPath = os.Getenv("log") | ||||
| 
 | ||||
| func preload(context *cli.Context) error { | ||||
| 	if logPath != "" { | ||||
| 		if err := openLog(logPath); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| 	app := cli.NewApp() | ||||
| 	app.Name = "nsinit" | ||||
| 	app.Version = "0.1" | ||||
| 	app.Author = "libcontainer maintainers" | ||||
| 
 | ||||
| 	app.Before = preload | ||||
| 	app.Commands = []cli.Command{ | ||||
| 		execCommand, | ||||
| 		initCommand, | ||||
| 		statsCommand, | ||||
| 		specCommand, | ||||
| 		nsenterCommand, | ||||
| 	} | ||||
| 
 | ||||
| 	if err := app.Run(os.Args); err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| } | ||||
|  | @ -1,40 +0,0 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"log" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/codegangsta/cli" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/namespaces" | ||||
| ) | ||||
| 
 | ||||
| var nsenterCommand = cli.Command{ | ||||
| 	Name:   "nsenter", | ||||
| 	Usage:  "init process for entering an existing namespace", | ||||
| 	Action: nsenterAction, | ||||
| } | ||||
| 
 | ||||
| func nsenterAction(context *cli.Context) { | ||||
| 	args := context.Args() | ||||
| 	if len(args) < 4 { | ||||
| 		log.Fatalf("incorrect usage: <pid> <process label> <container JSON> <cmd>...") | ||||
| 	} | ||||
| 
 | ||||
| 	container, err := loadContainerFromJson(args.Get(2)) | ||||
| 	if err != nil { | ||||
| 		log.Fatalf("unable to load container: %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	nspid, err := strconv.Atoi(args.Get(0)) | ||||
| 	if err != nil { | ||||
| 		log.Fatalf("unable to read pid: %s from %q", err, args.Get(0)) | ||||
| 	} | ||||
| 
 | ||||
| 	if nspid <= 0 { | ||||
| 		log.Fatalf("cannot enter into namespaces without valid pid: %q", nspid) | ||||
| 	} | ||||
| 
 | ||||
| 	if err := namespaces.NsEnter(container, args.Get(1), nspid, args[3:]); err != nil { | ||||
| 		log.Fatalf("failed to nsenter: %s", err) | ||||
| 	} | ||||
| } | ||||
|  | @ -1,40 +0,0 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"log" | ||||
| 
 | ||||
| 	"github.com/codegangsta/cli" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| ) | ||||
| 
 | ||||
| var specCommand = cli.Command{ | ||||
| 	Name:   "spec", | ||||
| 	Usage:  "display the container specification", | ||||
| 	Action: specAction, | ||||
| } | ||||
| 
 | ||||
| func specAction(context *cli.Context) { | ||||
| 	container, err := loadContainer() | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	spec, err := getContainerSpec(container) | ||||
| 	if err != nil { | ||||
| 		log.Fatalf("Failed to get spec - %v\n", err) | ||||
| 	} | ||||
| 
 | ||||
| 	fmt.Printf("Spec:\n%v\n", spec) | ||||
| } | ||||
| 
 | ||||
| // returns the container spec in json format. | ||||
| func getContainerSpec(container *libcontainer.Container) (string, error) { | ||||
| 	spec, err := json.MarshalIndent(container, "", "\t") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	return string(spec), nil | ||||
| } | ||||
|  | @ -1,46 +0,0 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"log" | ||||
| 
 | ||||
| 	"github.com/codegangsta/cli" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups/fs" | ||||
| ) | ||||
| 
 | ||||
| var statsCommand = cli.Command{ | ||||
| 	Name:   "stats", | ||||
| 	Usage:  "display statistics for the container", | ||||
| 	Action: statsAction, | ||||
| } | ||||
| 
 | ||||
| func statsAction(context *cli.Context) { | ||||
| 	container, err := loadContainer() | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	stats, err := getContainerStats(container) | ||||
| 	if err != nil { | ||||
| 		log.Fatalf("Failed to get stats - %v\n", err) | ||||
| 	} | ||||
| 
 | ||||
| 	fmt.Printf("Stats:\n%v\n", stats) | ||||
| } | ||||
| 
 | ||||
| // returns the container stats in json format. | ||||
| func getContainerStats(container *libcontainer.Container) (string, error) { | ||||
| 	stats, err := fs.GetStats(container.Cgroups) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	out, err := json.MarshalIndent(stats, "", "\t") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 
 | ||||
| 	return string(out), nil | ||||
| } | ||||
|  | @ -1,62 +0,0 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"io/ioutil" | ||||
| 	"log" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| ) | ||||
| 
 | ||||
| func loadContainer() (*libcontainer.Container, error) { | ||||
| 	f, err := os.Open(filepath.Join(dataPath, "container.json")) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
| 
 | ||||
| 	var container *libcontainer.Container | ||||
| 	if err := json.NewDecoder(f).Decode(&container); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return container, nil | ||||
| } | ||||
| 
 | ||||
| func readPid() (int, error) { | ||||
| 	data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid")) | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 
 | ||||
| 	pid, err := strconv.Atoi(string(data)) | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 
 | ||||
| 	return pid, nil | ||||
| } | ||||
| 
 | ||||
| func openLog(name string) error { | ||||
| 	f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	log.SetOutput(f) | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func loadContainerFromJson(rawData string) (*libcontainer.Container, error) { | ||||
| 	var container *libcontainer.Container | ||||
| 
 | ||||
| 	if err := json.Unmarshal([]byte(rawData), &container); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return container, nil | ||||
| } | ||||
|  | @ -1,57 +0,0 @@ | |||
| package capabilities | ||||
| 
 | ||||
| import ( | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer" | ||||
| 	"github.com/syndtr/gocapability/capability" | ||||
| ) | ||||
| 
 | ||||
| const allCapabilityTypes = capability.CAPS | capability.BOUNDS | ||||
| 
 | ||||
| // DropBoundingSet drops the capability bounding set to those specified in the | ||||
| // container configuration. | ||||
| func DropBoundingSet(container *libcontainer.Container) error { | ||||
| 	c, err := capability.NewPid(os.Getpid()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	keep := getEnabledCapabilities(container) | ||||
| 	c.Clear(capability.BOUNDS) | ||||
| 	c.Set(capability.BOUNDS, keep...) | ||||
| 
 | ||||
| 	if err := c.Apply(capability.BOUNDS); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // DropCapabilities drops all capabilities for the current process expect those specified in the container configuration. | ||||
| func DropCapabilities(container *libcontainer.Container) error { | ||||
| 	c, err := capability.NewPid(os.Getpid()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	keep := getEnabledCapabilities(container) | ||||
| 	c.Clear(allCapabilityTypes) | ||||
| 	c.Set(allCapabilityTypes, keep...) | ||||
| 
 | ||||
| 	if err := c.Apply(allCapabilityTypes); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // getEnabledCapabilities returns the capabilities that should not be dropped by the container. | ||||
| func getEnabledCapabilities(container *libcontainer.Container) []capability.Cap { | ||||
| 	keep := []capability.Cap{} | ||||
| 	for _, capability := range container.Capabilities { | ||||
| 		if c := libcontainer.GetCapability(capability); c != nil { | ||||
| 			keep = append(keep, c.Value) | ||||
| 		} | ||||
| 	} | ||||
| 	return keep | ||||
| } | ||||
|  | @ -1,52 +0,0 @@ | |||
| // +build linux | ||||
| 
 | ||||
| package restrict | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"syscall" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| ) | ||||
| 
 | ||||
| const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV | ||||
| 
 | ||||
| func mountReadonly(path string) error { | ||||
| 	for i := 0; i < 5; i++ { | ||||
| 		if err := system.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil { | ||||
| 			switch err { | ||||
| 			case syscall.EINVAL: | ||||
| 				// Probably not a mountpoint, use bind-mount | ||||
| 				if err := system.Mount(path, path, "", syscall.MS_BIND, ""); err != nil { | ||||
| 					return err | ||||
| 				} | ||||
| 				return system.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "") | ||||
| 			case syscall.EBUSY: | ||||
| 				time.Sleep(100 * time.Millisecond) | ||||
| 				continue | ||||
| 			default: | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
| 	return fmt.Errorf("unable to mount %s as readonly max retries reached", path) | ||||
| } | ||||
| 
 | ||||
| // This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts). | ||||
| // However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes). | ||||
| func Restrict(mounts ...string) error { | ||||
| 	// remount proc and sys as readonly | ||||
| 	for _, dest := range mounts { | ||||
| 		if err := mountReadonly(dest); err != nil { | ||||
| 			return fmt.Errorf("unable to remount %s readonly: %s", dest, err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if err := system.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { | ||||
| 		return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,9 +0,0 @@ | |||
| // +build !linux | ||||
| 
 | ||||
| package restrict | ||||
| 
 | ||||
| import "fmt" | ||||
| 
 | ||||
| func Restrict() error { | ||||
| 	return fmt.Errorf("not supported") | ||||
| } | ||||
|  | @ -1,398 +0,0 @@ | |||
| package selinux | ||||
| 
 | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"crypto/rand" | ||||
| 	"encoding/binary" | ||||
| 	"fmt" | ||||
| 	"github.com/dotcloud/docker/pkg/mount" | ||||
| 	"github.com/dotcloud/docker/pkg/system" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"regexp" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	Enforcing        = 1 | ||||
| 	Permissive       = 0 | ||||
| 	Disabled         = -1 | ||||
| 	selinuxDir       = "/etc/selinux/" | ||||
| 	selinuxConfig    = selinuxDir + "config" | ||||
| 	selinuxTypeTag   = "SELINUXTYPE" | ||||
| 	selinuxTag       = "SELINUX" | ||||
| 	selinuxPath      = "/sys/fs/selinux" | ||||
| 	xattrNameSelinux = "security.selinux" | ||||
| 	stRdOnly         = 0x01 | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	assignRegex           = regexp.MustCompile(`^([^=]+)=(.*)$`) | ||||
| 	spaceRegex            = regexp.MustCompile(`^([^=]+) (.*)$`) | ||||
| 	mcsList               = make(map[string]bool) | ||||
| 	selinuxfs             = "unknown" | ||||
| 	selinuxEnabled        = false | ||||
| 	selinuxEnabledChecked = false | ||||
| ) | ||||
| 
 | ||||
| type SELinuxContext map[string]string | ||||
| 
 | ||||
| // SetDisabled disables selinux support for the package | ||||
| func SetDisabled() { | ||||
| 	selinuxEnabled, selinuxEnabledChecked = false, true | ||||
| } | ||||
| 
 | ||||
| func getSelinuxMountPoint() string { | ||||
| 	if selinuxfs != "unknown" { | ||||
| 		return selinuxfs | ||||
| 	} | ||||
| 	selinuxfs = "" | ||||
| 
 | ||||
| 	mounts, err := mount.GetMounts() | ||||
| 	if err != nil { | ||||
| 		return selinuxfs | ||||
| 	} | ||||
| 	for _, mount := range mounts { | ||||
| 		if mount.Fstype == "selinuxfs" { | ||||
| 			selinuxfs = mount.Mountpoint | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if selinuxfs != "" { | ||||
| 		var buf syscall.Statfs_t | ||||
| 		syscall.Statfs(selinuxfs, &buf) | ||||
| 		if (buf.Flags & stRdOnly) == 1 { | ||||
| 			selinuxfs = "" | ||||
| 		} | ||||
| 	} | ||||
| 	return selinuxfs | ||||
| } | ||||
| 
 | ||||
| func SelinuxEnabled() bool { | ||||
| 	if selinuxEnabledChecked { | ||||
| 		return selinuxEnabled | ||||
| 	} | ||||
| 	selinuxEnabledChecked = true | ||||
| 	if fs := getSelinuxMountPoint(); fs != "" { | ||||
| 		if con, _ := getcon(); con != "kernel" { | ||||
| 			selinuxEnabled = true | ||||
| 		} | ||||
| 	} | ||||
| 	return selinuxEnabled | ||||
| } | ||||
| 
 | ||||
| func readConfig(target string) (value string) { | ||||
| 	var ( | ||||
| 		val, key string | ||||
| 		bufin    *bufio.Reader | ||||
| 	) | ||||
| 
 | ||||
| 	in, err := os.Open(selinuxConfig) | ||||
| 	if err != nil { | ||||
| 		return "" | ||||
| 	} | ||||
| 	defer in.Close() | ||||
| 
 | ||||
| 	bufin = bufio.NewReader(in) | ||||
| 
 | ||||
| 	for done := false; !done; { | ||||
| 		var line string | ||||
| 		if line, err = bufin.ReadString('\n'); err != nil { | ||||
| 			if err != io.EOF { | ||||
| 				return "" | ||||
| 			} | ||||
| 			done = true | ||||
| 		} | ||||
| 		line = strings.TrimSpace(line) | ||||
| 		if len(line) == 0 { | ||||
| 			// Skip blank lines | ||||
| 			continue | ||||
| 		} | ||||
| 		if line[0] == ';' || line[0] == '#' { | ||||
| 			// Skip comments | ||||
| 			continue | ||||
| 		} | ||||
| 		if groups := assignRegex.FindStringSubmatch(line); groups != nil { | ||||
| 			key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) | ||||
| 			if key == target { | ||||
| 				return strings.Trim(val, "\"") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return "" | ||||
| } | ||||
| 
 | ||||
| func getSELinuxPolicyRoot() string { | ||||
| 	return selinuxDir + readConfig(selinuxTypeTag) | ||||
| } | ||||
| 
 | ||||
| func readCon(name string) (string, error) { | ||||
| 	var val string | ||||
| 
 | ||||
| 	in, err := os.Open(name) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	defer in.Close() | ||||
| 
 | ||||
| 	_, err = fmt.Fscanf(in, "%s", &val) | ||||
| 	return val, err | ||||
| } | ||||
| 
 | ||||
| func Setfilecon(path string, scon string) error { | ||||
| 	return system.Lsetxattr(path, xattrNameSelinux, []byte(scon), 0) | ||||
| } | ||||
| 
 | ||||
| func Setfscreatecon(scon string) error { | ||||
| 	return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", system.Gettid()), scon) | ||||
| } | ||||
| 
 | ||||
| func Getfscreatecon() (string, error) { | ||||
| 	return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", system.Gettid())) | ||||
| } | ||||
| 
 | ||||
| func getcon() (string, error) { | ||||
| 	return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", system.Gettid())) | ||||
| } | ||||
| 
 | ||||
| func Getpidcon(pid int) (string, error) { | ||||
| 	return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) | ||||
| } | ||||
| 
 | ||||
| func Getexeccon() (string, error) { | ||||
| 	return readCon("/proc/self/attr/exec") | ||||
| } | ||||
| 
 | ||||
| func writeCon(name string, val string) error { | ||||
| 	if !SelinuxEnabled() { | ||||
| 		return nil | ||||
| 	} | ||||
| 	out, err := os.OpenFile(name, os.O_WRONLY, 0) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer out.Close() | ||||
| 
 | ||||
| 	if val != "" { | ||||
| 		_, err = out.Write([]byte(val)) | ||||
| 	} else { | ||||
| 		_, err = out.Write(nil) | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| func Setexeccon(scon string) error { | ||||
| 	return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", system.Gettid()), scon) | ||||
| } | ||||
| 
 | ||||
| func (c SELinuxContext) Get() string { | ||||
| 	return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"]) | ||||
| } | ||||
| 
 | ||||
| func NewContext(scon string) SELinuxContext { | ||||
| 	c := make(SELinuxContext) | ||||
| 
 | ||||
| 	if len(scon) != 0 { | ||||
| 		con := strings.SplitN(scon, ":", 4) | ||||
| 		c["user"] = con[0] | ||||
| 		c["role"] = con[1] | ||||
| 		c["type"] = con[2] | ||||
| 		c["level"] = con[3] | ||||
| 	} | ||||
| 	return c | ||||
| } | ||||
| 
 | ||||
| func ReserveLabel(scon string) { | ||||
| 	if len(scon) != 0 { | ||||
| 		con := strings.SplitN(scon, ":", 4) | ||||
| 		mcsAdd(con[3]) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func SelinuxGetEnforce() int { | ||||
| 	var enforce int | ||||
| 
 | ||||
| 	enforceS, err := readCon(fmt.Sprintf("%s/enforce", selinuxPath)) | ||||
| 	if err != nil { | ||||
| 		return -1 | ||||
| 	} | ||||
| 
 | ||||
| 	enforce, err = strconv.Atoi(string(enforceS)) | ||||
| 	if err != nil { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	return enforce | ||||
| } | ||||
| 
 | ||||
| func SelinuxGetEnforceMode() int { | ||||
| 	switch readConfig(selinuxTag) { | ||||
| 	case "enforcing": | ||||
| 		return Enforcing | ||||
| 	case "permissive": | ||||
| 		return Permissive | ||||
| 	} | ||||
| 	return Disabled | ||||
| } | ||||
| 
 | ||||
| func mcsAdd(mcs string) error { | ||||
| 	if mcsList[mcs] { | ||||
| 		return fmt.Errorf("MCS Label already exists") | ||||
| 	} | ||||
| 	mcsList[mcs] = true | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func mcsDelete(mcs string) { | ||||
| 	mcsList[mcs] = false | ||||
| } | ||||
| 
 | ||||
| func mcsExists(mcs string) bool { | ||||
| 	return mcsList[mcs] | ||||
| } | ||||
| 
 | ||||
| func IntToMcs(id int, catRange uint32) string { | ||||
| 	var ( | ||||
| 		SETSIZE = int(catRange) | ||||
| 		TIER    = SETSIZE | ||||
| 		ORD     = id | ||||
| 	) | ||||
| 
 | ||||
| 	if id < 1 || id > 523776 { | ||||
| 		return "" | ||||
| 	} | ||||
| 
 | ||||
| 	for ORD > TIER { | ||||
| 		ORD = ORD - TIER | ||||
| 		TIER -= 1 | ||||
| 	} | ||||
| 	TIER = SETSIZE - TIER | ||||
| 	ORD = ORD + TIER | ||||
| 	return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) | ||||
| } | ||||
| 
 | ||||
| func uniqMcs(catRange uint32) string { | ||||
| 	var ( | ||||
| 		n      uint32 | ||||
| 		c1, c2 uint32 | ||||
| 		mcs    string | ||||
| 	) | ||||
| 
 | ||||
| 	for { | ||||
| 		binary.Read(rand.Reader, binary.LittleEndian, &n) | ||||
| 		c1 = n % catRange | ||||
| 		binary.Read(rand.Reader, binary.LittleEndian, &n) | ||||
| 		c2 = n % catRange | ||||
| 		if c1 == c2 { | ||||
| 			continue | ||||
| 		} else { | ||||
| 			if c1 > c2 { | ||||
| 				t := c1 | ||||
| 				c1 = c2 | ||||
| 				c2 = t | ||||
| 			} | ||||
| 		} | ||||
| 		mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) | ||||
| 		if err := mcsAdd(mcs); err != nil { | ||||
| 			continue | ||||
| 		} | ||||
| 		break | ||||
| 	} | ||||
| 	return mcs | ||||
| } | ||||
| 
 | ||||
| func FreeLxcContexts(scon string) { | ||||
| 	if len(scon) != 0 { | ||||
| 		con := strings.SplitN(scon, ":", 4) | ||||
| 		mcsDelete(con[3]) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func GetLxcContexts() (processLabel string, fileLabel string) { | ||||
| 	var ( | ||||
| 		val, key string | ||||
| 		bufin    *bufio.Reader | ||||
| 	) | ||||
| 
 | ||||
| 	if !SelinuxEnabled() { | ||||
| 		return "", "" | ||||
| 	} | ||||
| 	lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot()) | ||||
| 	in, err := os.Open(lxcPath) | ||||
| 	if err != nil { | ||||
| 		return "", "" | ||||
| 	} | ||||
| 	defer in.Close() | ||||
| 
 | ||||
| 	bufin = bufio.NewReader(in) | ||||
| 
 | ||||
| 	for done := false; !done; { | ||||
| 		var line string | ||||
| 		if line, err = bufin.ReadString('\n'); err != nil { | ||||
| 			if err == io.EOF { | ||||
| 				done = true | ||||
| 			} else { | ||||
| 				goto exit | ||||
| 			} | ||||
| 		} | ||||
| 		line = strings.TrimSpace(line) | ||||
| 		if len(line) == 0 { | ||||
| 			// Skip blank lines | ||||
| 			continue | ||||
| 		} | ||||
| 		if line[0] == ';' || line[0] == '#' { | ||||
| 			// Skip comments | ||||
| 			continue | ||||
| 		} | ||||
| 		if groups := assignRegex.FindStringSubmatch(line); groups != nil { | ||||
| 			key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) | ||||
| 			if key == "process" { | ||||
| 				processLabel = strings.Trim(val, "\"") | ||||
| 			} | ||||
| 			if key == "file" { | ||||
| 				fileLabel = strings.Trim(val, "\"") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if processLabel == "" || fileLabel == "" { | ||||
| 		return "", "" | ||||
| 	} | ||||
| 
 | ||||
| exit: | ||||
| 	//	mcs := IntToMcs(os.Getpid(), 1024) | ||||
| 	mcs := uniqMcs(1024) | ||||
| 	scon := NewContext(processLabel) | ||||
| 	scon["level"] = mcs | ||||
| 	processLabel = scon.Get() | ||||
| 	scon = NewContext(fileLabel) | ||||
| 	scon["level"] = mcs | ||||
| 	fileLabel = scon.Get() | ||||
| 	return processLabel, fileLabel | ||||
| } | ||||
| 
 | ||||
| func SecurityCheckContext(val string) error { | ||||
| 	return writeCon(fmt.Sprintf("%s.context", selinuxPath), val) | ||||
| } | ||||
| 
 | ||||
| func CopyLevel(src, dest string) (string, error) { | ||||
| 	if !SelinuxEnabled() { | ||||
| 		return "", nil | ||||
| 	} | ||||
| 	if src == "" { | ||||
| 		return "", nil | ||||
| 	} | ||||
| 	if err := SecurityCheckContext(src); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	if err := SecurityCheckContext(dest); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	scon := NewContext(src) | ||||
| 	tcon := NewContext(dest) | ||||
| 	mcsDelete(tcon["level"]) | ||||
| 	mcsAdd(scon["level"]) | ||||
| 	tcon["level"] = scon["level"] | ||||
| 	return tcon.Get(), nil | ||||
| } | ||||
|  | @ -1,61 +0,0 @@ | |||
| package selinux_test | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/dotcloud/docker/pkg/libcontainer/selinux" | ||||
| 	"os" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| func testSetfilecon(t *testing.T) { | ||||
| 	if selinux.SelinuxEnabled() { | ||||
| 		tmp := "selinux_test" | ||||
| 		out, _ := os.OpenFile(tmp, os.O_WRONLY, 0) | ||||
| 		out.Close() | ||||
| 		err := selinux.Setfilecon(tmp, "system_u:object_r:bin_t:s0") | ||||
| 		if err != nil { | ||||
| 			t.Log("Setfilecon failed") | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		os.Remove(tmp) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestSELinux(t *testing.T) { | ||||
| 	var ( | ||||
| 		err            error | ||||
| 		plabel, flabel string | ||||
| 	) | ||||
| 
 | ||||
| 	if selinux.SelinuxEnabled() { | ||||
| 		t.Log("Enabled") | ||||
| 		plabel, flabel = selinux.GetLxcContexts() | ||||
| 		t.Log(plabel) | ||||
| 		t.Log(flabel) | ||||
| 		selinux.FreeLxcContexts(plabel) | ||||
| 		plabel, flabel = selinux.GetLxcContexts() | ||||
| 		t.Log(plabel) | ||||
| 		t.Log(flabel) | ||||
| 		selinux.FreeLxcContexts(plabel) | ||||
| 		t.Log("getenforce ", selinux.SelinuxGetEnforce()) | ||||
| 		t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode()) | ||||
| 		pid := os.Getpid() | ||||
| 		t.Log("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023)) | ||||
| 		err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0") | ||||
| 		if err == nil { | ||||
| 			t.Log(selinux.Getfscreatecon()) | ||||
| 		} else { | ||||
| 			t.Log("setfscreatecon failed", err) | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		err = selinux.Setfscreatecon("") | ||||
| 		if err == nil { | ||||
| 			t.Log(selinux.Getfscreatecon()) | ||||
| 		} else { | ||||
| 			t.Log("setfscreatecon failed", err) | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		t.Log(selinux.Getpidcon(1)) | ||||
| 	} else { | ||||
| 		t.Log("Disabled") | ||||
| 	} | ||||
| } | ||||
|  | @ -1,165 +0,0 @@ | |||
| package libcontainer | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 
 | ||||
| 	"github.com/syndtr/gocapability/capability" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	ErrUnkownNamespace  = errors.New("Unknown namespace") | ||||
| 	ErrUnkownCapability = errors.New("Unknown capability") | ||||
| 	ErrUnsupported      = errors.New("Unsupported method") | ||||
| ) | ||||
| 
 | ||||
| type Mounts []Mount | ||||
| 
 | ||||
| func (s Mounts) OfType(t string) Mounts { | ||||
| 	out := Mounts{} | ||||
| 	for _, m := range s { | ||||
| 		if m.Type == t { | ||||
| 			out = append(out, m) | ||||
| 		} | ||||
| 	} | ||||
| 	return out | ||||
| } | ||||
| 
 | ||||
| type Mount struct { | ||||
| 	Type        string `json:"type,omitempty"` | ||||
| 	Source      string `json:"source,omitempty"`      // Source path, in the host namespace | ||||
| 	Destination string `json:"destination,omitempty"` // Destination path, in the container | ||||
| 	Writable    bool   `json:"writable,omitempty"` | ||||
| 	Private     bool   `json:"private,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // namespaceList is used to convert the libcontainer types | ||||
| // into the names of the files located in /proc/<pid>/ns/* for | ||||
| // each namespace | ||||
| var ( | ||||
| 	namespaceList = Namespaces{} | ||||
| 
 | ||||
| 	capabilityList = Capabilities{ | ||||
| 		{Key: "SETPCAP", Value: capability.CAP_SETPCAP}, | ||||
| 		{Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE}, | ||||
| 		{Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO}, | ||||
| 		{Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT}, | ||||
| 		{Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN}, | ||||
| 		{Key: "SYS_NICE", Value: capability.CAP_SYS_NICE}, | ||||
| 		{Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE}, | ||||
| 		{Key: "SYS_TIME", Value: capability.CAP_SYS_TIME}, | ||||
| 		{Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG}, | ||||
| 		{Key: "MKNOD", Value: capability.CAP_MKNOD}, | ||||
| 		{Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE}, | ||||
| 		{Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL}, | ||||
| 		{Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE}, | ||||
| 		{Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN}, | ||||
| 		{Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN}, | ||||
| 		{Key: "SYSLOG", Value: capability.CAP_SYSLOG}, | ||||
| 		{Key: "SETUID", Value: capability.CAP_SETUID}, | ||||
| 		{Key: "SETGID", Value: capability.CAP_SETGID}, | ||||
| 		{Key: "CHOWN", Value: capability.CAP_CHOWN}, | ||||
| 		{Key: "NET_RAW", Value: capability.CAP_NET_RAW}, | ||||
| 		{Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE}, | ||||
| 		{Key: "FOWNER", Value: capability.CAP_FOWNER}, | ||||
| 		{Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH}, | ||||
| 		{Key: "FSETID", Value: capability.CAP_FSETID}, | ||||
| 		{Key: "KILL", Value: capability.CAP_KILL}, | ||||
| 		{Key: "SETGID", Value: capability.CAP_SETGID}, | ||||
| 		{Key: "SETUID", Value: capability.CAP_SETUID}, | ||||
| 		{Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE}, | ||||
| 		{Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE}, | ||||
| 		{Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST}, | ||||
| 		{Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK}, | ||||
| 		{Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER}, | ||||
| 		{Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT}, | ||||
| 		{Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE}, | ||||
| 		{Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT}, | ||||
| 		{Key: "LEASE", Value: capability.CAP_LEASE}, | ||||
| 		{Key: "SETFCAP", Value: capability.CAP_SETFCAP}, | ||||
| 		{Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM}, | ||||
| 		{Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND}, | ||||
| 	} | ||||
| ) | ||||
| 
 | ||||
| type ( | ||||
| 	Namespace struct { | ||||
| 		Key   string `json:"key,omitempty"` | ||||
| 		Value int    `json:"value,omitempty"` | ||||
| 		File  string `json:"file,omitempty"` | ||||
| 	} | ||||
| 	Namespaces []*Namespace | ||||
| ) | ||||
| 
 | ||||
| func (ns *Namespace) String() string { | ||||
| 	return ns.Key | ||||
| } | ||||
| 
 | ||||
| func GetNamespace(key string) *Namespace { | ||||
| 	for _, ns := range namespaceList { | ||||
| 		if ns.Key == key { | ||||
| 			cpy := *ns | ||||
| 			return &cpy | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Contains returns true if the specified Namespace is | ||||
| // in the slice | ||||
| func (n Namespaces) Contains(ns string) bool { | ||||
| 	return n.Get(ns) != nil | ||||
| } | ||||
| 
 | ||||
| func (n Namespaces) Get(ns string) *Namespace { | ||||
| 	for _, nsp := range n { | ||||
| 		if nsp != nil && nsp.Key == ns { | ||||
| 			return nsp | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| type ( | ||||
| 	Capability struct { | ||||
| 		Key   string         `json:"key,omitempty"` | ||||
| 		Value capability.Cap `json:"value,omitempty"` | ||||
| 	} | ||||
| 	Capabilities []*Capability | ||||
| ) | ||||
| 
 | ||||
| func (c *Capability) String() string { | ||||
| 	return c.Key | ||||
| } | ||||
| 
 | ||||
| func GetCapability(key string) *Capability { | ||||
| 	for _, capp := range capabilityList { | ||||
| 		if capp.Key == key { | ||||
| 			cpy := *capp | ||||
| 			return &cpy | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func GetAllCapabilities() []string { | ||||
| 	output := make([]string, len(capabilityList)) | ||||
| 	for i, capability := range capabilityList { | ||||
| 		output[i] = capability.String() | ||||
| 	} | ||||
| 	return output | ||||
| } | ||||
| 
 | ||||
| // Contains returns true if the specified Capability is | ||||
| // in the slice | ||||
| func (c Capabilities) Contains(capp string) bool { | ||||
| 	return c.Get(capp) != nil | ||||
| } | ||||
| 
 | ||||
| func (c Capabilities) Get(capp string) *Capability { | ||||
| 	for _, cap := range c { | ||||
| 		if cap.Key == capp { | ||||
| 			return cap | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,16 +0,0 @@ | |||
| package libcontainer | ||||
| 
 | ||||
| import ( | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| func init() { | ||||
| 	namespaceList = Namespaces{ | ||||
| 		{Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"}, | ||||
| 		{Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"}, | ||||
| 		{Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"}, | ||||
| 		{Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"}, | ||||
| 		{Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"}, | ||||
| 		{Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"}, | ||||
| 	} | ||||
| } | ||||
|  | @ -1,44 +0,0 @@ | |||
| package libcontainer | ||||
| 
 | ||||
| import ( | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| func TestNamespacesContains(t *testing.T) { | ||||
| 	ns := Namespaces{ | ||||
| 		GetNamespace("NEWPID"), | ||||
| 		GetNamespace("NEWNS"), | ||||
| 		GetNamespace("NEWUTS"), | ||||
| 	} | ||||
| 
 | ||||
| 	if ns.Contains("NEWNET") { | ||||
| 		t.Fatal("namespaces should not contain NEWNET") | ||||
| 	} | ||||
| 
 | ||||
| 	if !ns.Contains("NEWPID") { | ||||
| 		t.Fatal("namespaces should contain NEWPID but does not") | ||||
| 	} | ||||
| 
 | ||||
| 	withNil := Namespaces{ | ||||
| 		GetNamespace("UNDEFINED"), // this element will be nil | ||||
| 		GetNamespace("NEWPID"), | ||||
| 	} | ||||
| 
 | ||||
| 	if !withNil.Contains("NEWPID") { | ||||
| 		t.Fatal("namespaces should contain NEWPID but does not") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestCapabilitiesContains(t *testing.T) { | ||||
| 	caps := Capabilities{ | ||||
| 		GetCapability("MKNOD"), | ||||
| 		GetCapability("SETPCAP"), | ||||
| 	} | ||||
| 
 | ||||
| 	if caps.Contains("SYS_ADMIN") { | ||||
| 		t.Fatal("capabilities should not contain SYS_ADMIN") | ||||
| 	} | ||||
| 	if !caps.Contains("MKNOD") { | ||||
| 		t.Fatal("capabilities should contain MKNOD but does not") | ||||
| 	} | ||||
| } | ||||
|  | @ -1,28 +0,0 @@ | |||
| package utils | ||||
| 
 | ||||
| import ( | ||||
| 	"crypto/rand" | ||||
| 	"encoding/hex" | ||||
| 	"io" | ||||
| 	"path/filepath" | ||||
| ) | ||||
| 
 | ||||
| // GenerateRandomName returns a new name joined with a prefix.  This size | ||||
| // specified is used to truncate the randomly generated value | ||||
| func GenerateRandomName(prefix string, size int) (string, error) { | ||||
| 	id := make([]byte, 32) | ||||
| 	if _, err := io.ReadFull(rand.Reader, id); err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return prefix + hex.EncodeToString(id)[:size], nil | ||||
| } | ||||
| 
 | ||||
| // ResolveRootfs ensures that the current working directory is | ||||
| // not a symlink and returns the absolute path to the rootfs | ||||
| func ResolveRootfs(uncleanRootfs string) (string, error) { | ||||
| 	rootfs, err := filepath.Abs(uncleanRootfs) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return filepath.EvalSymlinks(rootfs) | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue