Merge pull request #5411 from crosbymichael/lockdown

Update default restrictions for exec drivers
This commit is contained in:
unclejack 2014-04-26 03:27:56 +03:00
commit 3a8c935a4d
17 changed files with 733 additions and 412 deletions

View file

@ -16,76 +16,149 @@ process are specified in this file. The configuration is used for each process
Sample `container.json` file: Sample `container.json` file:
```json ```json
{ {
"mounts" : [
{
"type" : "devtmpfs"
}
],
"tty" : true,
"environment" : [
"HOME=/",
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
"container=docker",
"TERM=xterm-256color"
],
"hostname" : "koye", "hostname" : "koye",
"cgroups" : {
"parent" : "docker",
"name" : "docker-koye"
},
"capabilities_mask" : [
{
"value" : 8,
"key" : "SETPCAP",
"enabled" : false
},
{
"enabled" : false,
"value" : 16,
"key" : "SYS_MODULE"
},
{
"value" : 17,
"key" : "SYS_RAWIO",
"enabled" : false
},
{
"key" : "SYS_PACCT",
"value" : 20,
"enabled" : false
},
{
"value" : 21,
"key" : "SYS_ADMIN",
"enabled" : false
},
{
"value" : 23,
"key" : "SYS_NICE",
"enabled" : false
},
{
"value" : 24,
"key" : "SYS_RESOURCE",
"enabled" : false
},
{
"key" : "SYS_TIME",
"value" : 25,
"enabled" : false
},
{
"enabled" : false,
"value" : 26,
"key" : "SYS_TTY_CONFIG"
},
{
"key" : "AUDIT_WRITE",
"value" : 29,
"enabled" : false
},
{
"value" : 30,
"key" : "AUDIT_CONTROL",
"enabled" : false
},
{
"enabled" : false,
"key" : "MAC_OVERRIDE",
"value" : 32
},
{
"enabled" : false,
"key" : "MAC_ADMIN",
"value" : 33
},
{
"key" : "NET_ADMIN",
"value" : 12,
"enabled" : false
},
{
"value" : 27,
"key" : "MKNOD",
"enabled" : true
}
],
"networks" : [ "networks" : [
{ {
"gateway" : "172.17.42.1", "mtu" : 1500,
"address" : "127.0.0.1/0",
"type" : "loopback",
"gateway" : "localhost"
},
{
"mtu" : 1500,
"address" : "172.17.42.2/16",
"type" : "veth",
"context" : { "context" : {
"bridge" : "docker0", "bridge" : "docker0",
"prefix" : "veth" "prefix" : "veth"
}, },
"address" : "172.17.0.2/16", "gateway" : "172.17.42.1"
"type" : "veth",
"mtu" : 1500
}
],
"cgroups" : {
"parent" : "docker",
"name" : "11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620"
},
"tty" : true,
"environment" : [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=11bb30683fb0",
"TERM=xterm"
],
"capabilities_mask" : [
"SETPCAP",
"SYS_MODULE",
"SYS_RAWIO",
"SYS_PACCT",
"SYS_ADMIN",
"SYS_NICE",
"SYS_RESOURCE",
"SYS_TIME",
"SYS_TTY_CONFIG",
"MKNOD",
"AUDIT_WRITE",
"AUDIT_CONTROL",
"MAC_OVERRIDE",
"MAC_ADMIN",
"NET_ADMIN"
],
"context" : {
"apparmor_profile" : "docker-default"
},
"mounts" : [
{
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/resolv.conf",
"writable" : false,
"destination" : "/etc/resolv.conf",
"private" : true
},
{
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/hostname",
"writable" : false,
"destination" : "/etc/hostname",
"private" : true
},
{
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/hosts",
"writable" : false,
"destination" : "/etc/hosts",
"private" : true
} }
], ],
"namespaces" : [ "namespaces" : [
"NEWNS", {
"NEWUTS", "key" : "NEWNS",
"NEWIPC", "value" : 131072,
"NEWPID", "enabled" : true,
"NEWNET" "file" : "mnt"
},
{
"key" : "NEWUTS",
"value" : 67108864,
"enabled" : true,
"file" : "uts"
},
{
"enabled" : true,
"file" : "ipc",
"key" : "NEWIPC",
"value" : 134217728
},
{
"file" : "pid",
"enabled" : true,
"value" : 536870912,
"key" : "NEWPID"
},
{
"enabled" : true,
"file" : "net",
"key" : "NEWNET",
"value" : 1073741824
}
] ]
} }
``` ```

View file

@ -0,0 +1,60 @@
// +build linux
package console
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
"syscall"
)
// Setup initializes the proper /dev/console inside the rootfs path
func Setup(rootfs, consolePath, mountLabel string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
stat, err := os.Stat(consolePath)
if err != nil {
return fmt.Errorf("stat console %s %s", consolePath, err)
}
var (
st = stat.Sys().(*syscall.Stat_t)
dest = filepath.Join(rootfs, "dev/console")
)
if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove %s %s", dest, err)
}
if err := os.Chmod(consolePath, 0600); err != nil {
return err
}
if err := os.Chown(consolePath, 0, 0); err != nil {
return err
}
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
return fmt.Errorf("mknod %s %s", dest, err)
}
if err := label.SetFileLabel(consolePath, mountLabel); err != nil {
return fmt.Errorf("set file label %s %s", dest, err)
}
if err := system.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", consolePath, dest, err)
}
return nil
}
func OpenAndDup(consolePath string) error {
slave, err := system.OpenTerminal(consolePath, syscall.O_RDWR)
if err != nil {
return fmt.Errorf("open terminal %s", err)
}
if err := system.Dup2(slave.Fd(), 0); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 1); err != nil {
return err
}
return system.Dup2(slave.Fd(), 2)
}

View file

@ -23,7 +23,7 @@ type Container struct {
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux) Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
Mounts []Mount `json:"mounts,omitempty"` Mounts Mounts `json:"mounts,omitempty"`
} }
// Network defines configuration for a container's networking stack // Network defines configuration for a container's networking stack
@ -37,12 +37,3 @@ type Network struct {
Gateway string `json:"gateway,omitempty"` Gateway string `json:"gateway,omitempty"`
Mtu int `json:"mtu,omitempty"` Mtu int `json:"mtu,omitempty"`
} }
// Bind mounts from the host system to the container
//
type Mount struct {
Source string `json:"source"` // Source path, in the host namespace
Destination string `json:"destination"` // Destination path, in the container
Writable bool `json:"writable"`
Private bool `json:"private"`
}

View file

@ -1,5 +1,9 @@
{ {
"hostname": "koye", "mounts" : [
{
"type" : "devtmpfs"
}
],
"tty" : true, "tty" : true,
"environment" : [ "environment" : [
"HOME=/", "HOME=/",
@ -7,44 +11,136 @@
"container=docker", "container=docker",
"TERM=xterm-256color" "TERM=xterm-256color"
], ],
"namespaces": [ "hostname" : "koye",
"NEWIPC", "cgroups" : {
"NEWNS", "parent" : "docker",
"NEWPID", "name" : "docker-koye"
"NEWUTS", },
"NEWNET"
],
"capabilities_mask" : [ "capabilities_mask" : [
"SETPCAP", {
"SYS_MODULE", "value" : 8,
"SYS_RAWIO", "key" : "SETPCAP",
"SYS_PACCT", "enabled" : false
"SYS_ADMIN", },
"SYS_NICE", {
"SYS_RESOURCE", "enabled" : false,
"SYS_TIME", "value" : 16,
"SYS_TTY_CONFIG", "key" : "SYS_MODULE"
"MKNOD", },
"AUDIT_WRITE", {
"AUDIT_CONTROL", "value" : 17,
"MAC_OVERRIDE", "key" : "SYS_RAWIO",
"MAC_ADMIN", "enabled" : false
"NET_ADMIN" },
{
"key" : "SYS_PACCT",
"value" : 20,
"enabled" : false
},
{
"value" : 21,
"key" : "SYS_ADMIN",
"enabled" : false
},
{
"value" : 23,
"key" : "SYS_NICE",
"enabled" : false
},
{
"value" : 24,
"key" : "SYS_RESOURCE",
"enabled" : false
},
{
"key" : "SYS_TIME",
"value" : 25,
"enabled" : false
},
{
"enabled" : false,
"value" : 26,
"key" : "SYS_TTY_CONFIG"
},
{
"key" : "AUDIT_WRITE",
"value" : 29,
"enabled" : false
},
{
"value" : 30,
"key" : "AUDIT_CONTROL",
"enabled" : false
},
{
"enabled" : false,
"key" : "MAC_OVERRIDE",
"value" : 32
},
{
"enabled" : false,
"key" : "MAC_ADMIN",
"value" : 33
},
{
"key" : "NET_ADMIN",
"value" : 12,
"enabled" : false
},
{
"value" : 27,
"key" : "MKNOD",
"enabled" : true
}
], ],
"networks": [{ "networks" : [
{
"mtu" : 1500,
"address" : "127.0.0.1/0",
"type" : "loopback",
"gateway" : "localhost"
},
{
"mtu" : 1500,
"address" : "172.17.42.2/16",
"type" : "veth", "type" : "veth",
"context" : { "context" : {
"bridge" : "docker0", "bridge" : "docker0",
"prefix": "dock" "prefix" : "veth"
}, },
"address": "172.17.0.100/16", "gateway" : "172.17.42.1"
"gateway": "172.17.42.1",
"mtu": 1500
} }
], ],
"cgroups": { "namespaces" : [
"name": "docker-koye", {
"parent": "docker", "key" : "NEWNS",
"memory": 5248000 "value" : 131072,
"enabled" : true,
"file" : "mnt"
},
{
"key" : "NEWUTS",
"value" : 67108864,
"enabled" : true,
"file" : "uts"
},
{
"enabled" : true,
"file" : "ipc",
"key" : "NEWIPC",
"value" : 134217728
},
{
"file" : "pid",
"enabled" : true,
"value" : 536870912,
"key" : "NEWPID"
},
{
"enabled" : true,
"file" : "net",
"key" : "NEWNET",
"value" : 1073741824
} }
]
} }

143
libcontainer/mount/init.go Normal file
View file

@ -0,0 +1,143 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
"syscall"
)
// default mount point flags
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
type mount struct {
source string
path string
device string
flags int
data string
}
// InitializeMountNamespace setups up the devices, mount points, and filesystems for use inside a
// new mount namepsace
func InitializeMountNamespace(rootfs, console string, container *libcontainer.Container) error {
var (
err error
flag = syscall.MS_PRIVATE
)
if container.NoPivotRoot {
flag = syscall.MS_SLAVE
}
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
return fmt.Errorf("mounting / as slave %s", err)
}
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
}
if err := mountSystem(rootfs, container); err != nil {
return fmt.Errorf("mount system %s", err)
}
if err := setupBindmounts(rootfs, container.Mounts); err != nil {
return fmt.Errorf("bind mounts %s", err)
}
if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil {
return fmt.Errorf("copy dev nodes %s", err)
}
if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
return fmt.Errorf("restrict %s", err)
}
}
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
return err
}
if err := system.Chdir(rootfs); err != nil {
return fmt.Errorf("chdir into %s %s", rootfs, err)
}
if container.NoPivotRoot {
err = MsMoveRoot(rootfs)
} else {
err = PivotRoot(rootfs)
}
if err != nil {
return err
}
if container.ReadonlyFs {
if err := SetReadonly(); err != nil {
return fmt.Errorf("set readonly %s", err)
}
}
system.Umask(0022)
return nil
}
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string, container *libcontainer.Container) error {
for _, m := range newSystemMounts(rootfs, container.Context["mount_label"], container.Mounts) {
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdirall %s %s", m.path, err)
}
if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
}
}
return nil
}
func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
for _, m := range bindMounts.OfType("bind") {
var (
flags = syscall.MS_BIND | syscall.MS_REC
dest = filepath.Join(rootfs, m.Destination)
)
if !m.Writable {
flags = flags | syscall.MS_RDONLY
}
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
}
if !m.Writable {
if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
}
}
if m.Private {
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return fmt.Errorf("mounting %s private %s", dest, err)
}
}
}
return nil
}
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
// standard bind mounts allowing them to be more dymanic
func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
systemMounts := []mount{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
}
if len(mounts.OfType("devtmpfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"})
}
systemMounts = append(systemMounts,
mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)})
if len(mounts.OfType("sysfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags})
}
return systemMounts
}

View file

@ -0,0 +1,19 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func MsMoveRoot(rootfs string) error {
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return fmt.Errorf("mount move %s into / %s", rootfs, err)
}
if err := system.Chroot("."); err != nil {
return fmt.Errorf("chroot . %s", err)
}
return system.Chdir("/")
}

View file

@ -0,0 +1,49 @@
// +build linux
package nodes
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
"syscall"
)
// Default list of device nodes to copy
var DefaultNodes = []string{
"null",
"zero",
"full",
"random",
"urandom",
"tty",
}
// CopyN copies the device node from the host into the rootfs
func CopyN(rootfs string, nodesToCopy []string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
for _, node := range nodesToCopy {
if err := Copy(rootfs, node); err != nil {
return err
}
}
return nil
}
func Copy(rootfs, node string) error {
stat, err := os.Stat(filepath.Join("/dev", node))
if err != nil {
return err
}
var (
dest = filepath.Join(rootfs, "dev", node)
st = stat.Sys().(*syscall.Stat_t)
)
if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
return fmt.Errorf("copy %s %s", node, err)
}
return nil
}

View file

@ -0,0 +1,31 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
func PivotRoot(rootfs string) error {
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err)
}
if err := system.Pivotroot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
return os.Remove(pivotDir)
}

View file

@ -0,0 +1,26 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer/console"
"os"
"path/filepath"
)
func SetupPtmx(rootfs, consolePath, mountLabel string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if consolePath != "" {
if err := console.Setup(rootfs, consolePath, mountLabel); err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,12 @@
// +build linux
package mount
import (
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func SetReadonly() error {
return system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}

View file

@ -0,0 +1,31 @@
// +build linux
package mount
import (
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func RemountProc() error {
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
return err
}
if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
return err
}
return nil
}
func RemountSys() error {
if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil {
if err != syscall.EINVAL {
return err
}
} else {
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
return err
}
}
return nil
}

View file

@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/system"
"os" "os"
"path/filepath" "path/filepath"
@ -63,10 +64,10 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s
if err := system.Unshare(syscall.CLONE_NEWNS); err != nil { if err := system.Unshare(syscall.CLONE_NEWNS); err != nil {
return -1, err return -1, err
} }
if err := remountProc(); err != nil { if err := mount.RemountProc(); err != nil {
return -1, fmt.Errorf("remount proc %s", err) return -1, fmt.Errorf("remount proc %s", err)
} }
if err := remountSys(); err != nil { if err := mount.RemountSys(); err != nil {
return -1, fmt.Errorf("remount sys %s", err) return -1, fmt.Errorf("remount sys %s", err)
} }
goto dropAndExec goto dropAndExec

View file

@ -11,8 +11,10 @@ import (
"github.com/dotcloud/docker/pkg/apparmor" "github.com/dotcloud/docker/pkg/apparmor"
"github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/console"
"github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/user" "github.com/dotcloud/docker/pkg/user"
@ -20,7 +22,7 @@ import (
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
// and other options required for the new container. // and other options required for the new container.
func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
rootfs, err := utils.ResolveRootfs(uncleanRootfs) rootfs, err := utils.ResolveRootfs(uncleanRootfs)
if err != nil { if err != nil {
return err return err
@ -36,20 +38,16 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
ns.logger.Println("received context from parent") ns.logger.Println("received context from parent")
syncPipe.Close() syncPipe.Close()
if console != "" { if consolePath != "" {
ns.logger.Printf("setting up %s as console\n", console) ns.logger.Printf("setting up %s as console\n", consolePath)
slave, err := system.OpenTerminal(console, syscall.O_RDWR) if err := console.OpenAndDup(consolePath); err != nil {
if err != nil { return err
return fmt.Errorf("open terminal %s", err)
}
if err := dupSlave(slave); err != nil {
return fmt.Errorf("dup2 slave %s", err)
} }
} }
if _, err := system.Setsid(); err != nil { if _, err := system.Setsid(); err != nil {
return fmt.Errorf("setsid %s", err) return fmt.Errorf("setsid %s", err)
} }
if console != "" { if consolePath != "" {
if err := system.Setctty(); err != nil { if err := system.Setctty(); err != nil {
return fmt.Errorf("setctty %s", err) return fmt.Errorf("setctty %s", err)
} }
@ -60,7 +58,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
label.Init() label.Init()
ns.logger.Println("setup mount namespace") ns.logger.Println("setup mount namespace")
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil { if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil {
return fmt.Errorf("setup mount namespace %s", err) return fmt.Errorf("setup mount namespace %s", err)
} }
if err := system.Sethostname(container.Hostname); err != nil { if err := system.Sethostname(container.Hostname); err != nil {
@ -114,21 +112,6 @@ func setupUser(container *libcontainer.Container) error {
return nil return nil
} }
// dupSlave dup2 the pty slave's fd into stdout and stdin and ensures that
// the slave's fd is 0, or stdin
func dupSlave(slave *os.File) error {
if err := system.Dup2(slave.Fd(), 0); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 1); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 2); err != nil {
return err
}
return nil
}
// setupVethNetwork uses the Network config if it is not nil to initialize // setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0 // the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway // setting the MTU and IP address along with the default gateway

View file

@ -1,265 +0,0 @@
// +build linux
package nsinit
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/system"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
// default mount point flags
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// setupNewMountNamespace is used to initialize a new mount namespace for an new
// container in the rootfs that is specified.
//
// There is no need to unmount the new mounts because as soon as the mount namespace
// is no longer in use, the mounts will be removed automatically
func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool, mountLabel string) error {
flag := syscall.MS_PRIVATE
if noPivotRoot {
flag = syscall.MS_SLAVE
}
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
return fmt.Errorf("mounting / as slave %s", err)
}
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
}
if err := mountSystem(rootfs, mountLabel); err != nil {
return fmt.Errorf("mount system %s", err)
}
for _, m := range bindMounts {
var (
flags = syscall.MS_BIND | syscall.MS_REC
dest = filepath.Join(rootfs, m.Destination)
)
if !m.Writable {
flags = flags | syscall.MS_RDONLY
}
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
}
if !m.Writable {
if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
}
}
if m.Private {
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return fmt.Errorf("mounting %s private %s", dest, err)
}
}
}
if err := copyDevNodes(rootfs); err != nil {
return fmt.Errorf("copy dev nodes %s", err)
}
if err := setupPtmx(rootfs, console, mountLabel); err != nil {
return err
}
if err := system.Chdir(rootfs); err != nil {
return fmt.Errorf("chdir into %s %s", rootfs, err)
}
if noPivotRoot {
if err := rootMsMove(rootfs); err != nil {
return err
}
} else {
if err := rootPivot(rootfs); err != nil {
return err
}
}
if readonly {
if err := system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mounting %s as readonly %s", rootfs, err)
}
}
system.Umask(0022)
return nil
}
// use a pivot root to setup the rootfs
func rootPivot(rootfs string) error {
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err)
}
if err := system.Pivotroot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
if err := os.Remove(pivotDir); err != nil {
return fmt.Errorf("remove pivot_root dir %s", err)
}
return nil
}
// use MS_MOVE and chroot to setup the rootfs
func rootMsMove(rootfs string) error {
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return fmt.Errorf("mount move %s into / %s", rootfs, err)
}
if err := system.Chroot("."); err != nil {
return fmt.Errorf("chroot . %s", err)
}
if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
return nil
}
// copyDevNodes mknods the hosts devices so the new container has access to them
func copyDevNodes(rootfs string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
for _, node := range []string{
"null",
"zero",
"full",
"random",
"urandom",
"tty",
} {
if err := copyDevNode(rootfs, node); err != nil {
return err
}
}
return nil
}
func copyDevNode(rootfs, node string) error {
stat, err := os.Stat(filepath.Join("/dev", node))
if err != nil {
return err
}
var (
dest = filepath.Join(rootfs, "dev", node)
st = stat.Sys().(*syscall.Stat_t)
)
if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
return fmt.Errorf("copy %s %s", node, err)
}
return nil
}
// setupConsole ensures that the container has a proper /dev/console setup
func setupConsole(rootfs, console string, mountLabel string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
stat, err := os.Stat(console)
if err != nil {
return fmt.Errorf("stat console %s %s", console, err)
}
var (
st = stat.Sys().(*syscall.Stat_t)
dest = filepath.Join(rootfs, "dev/console")
)
if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove %s %s", dest, err)
}
if err := os.Chmod(console, 0600); err != nil {
return err
}
if err := os.Chown(console, 0, 0); err != nil {
return err
}
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
return fmt.Errorf("mknod %s %s", dest, err)
}
if err := label.SetFileLabel(console, mountLabel); err != nil {
return fmt.Errorf("SetFileLabel Failed %s %s", dest, err)
}
if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", console, dest, err)
}
return nil
}
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string, mountLabel string) error {
for _, m := range []struct {
source string
path string
device string
flags int
data string
}{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
} {
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdirall %s %s", m.path, err)
}
if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
}
}
return nil
}
// setupPtmx adds a symlink to pts/ptmx for /dev/ptmx and
// finishes setting up /dev/console
func setupPtmx(rootfs, console string, mountLabel string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != "" {
if err := setupConsole(rootfs, console, mountLabel); err != nil {
return err
}
}
return nil
}
// remountProc is used to detach and remount the proc filesystem
// commonly needed with running a new process inside an existing container
func remountProc() error {
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
return err
}
if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
return err
}
return nil
}
func remountSys() error {
if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil {
if err != syscall.EINVAL {
return err
}
} else {
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,51 @@
package restrict
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/dotcloud/docker/pkg/system"
)
const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY
var restrictions = map[string]string{
// dirs
"/proc/sys": "",
"/proc/irq": "",
"/proc/acpi": "",
// files
"/proc/sysrq-trigger": "/dev/null",
"/proc/kcore": "/dev/null",
}
// Restrict locks down access to many areas of proc
// by using the asumption that the user does not have mount caps to
// revert the changes made here
func Restrict(rootfs, empty string) error {
for dest, source := range restrictions {
dest = filepath.Join(rootfs, dest)
// we don't have a "/dev/null" for dirs so have the requester pass a dir
// for us to bind mount
switch source {
case "":
source = empty
default:
source = filepath.Join(rootfs, source)
}
if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
if os.IsNotExist(err) {
continue
}
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
}
if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
}
}
return nil
}

View file

@ -11,6 +11,26 @@ var (
ErrUnsupported = errors.New("Unsupported method") ErrUnsupported = errors.New("Unsupported method")
) )
type Mounts []Mount
func (s Mounts) OfType(t string) Mounts {
out := Mounts{}
for _, m := range s {
if m.Type == t {
out = append(out, m)
}
}
return out
}
type Mount struct {
Type string `json:"type,omitempty"`
Source string `json:"source,omitempty"` // Source path, in the host namespace
Destination string `json:"destination,omitempty"` // Destination path, in the container
Writable bool `json:"writable,omitempty"`
Private bool `json:"private,omitempty"`
}
// namespaceList is used to convert the libcontainer types // namespaceList is used to convert the libcontainer types
// into the names of the files located in /proc/<pid>/ns/* for // into the names of the files located in /proc/<pid>/ns/* for
// each namespace // each namespace