Merge pull request #5529 from crosbymichael/restrict-proc

Mount /proc and /sys read-only, except in privileged containers
This commit is contained in:
Guillaume J. Charmes 2014-05-02 10:52:53 -07:00
commit 473686ec89
7 changed files with 41 additions and 64 deletions

View file

@ -20,7 +20,7 @@ func IsEnabled() bool {
return false
}
func ApplyProfile(pid int, name string) error {
func ApplyProfile(name string) error {
if name == "" {
return nil
}

View file

@ -2,12 +2,10 @@
package apparmor
import ()
func IsEnabled() bool {
return false
}
func ApplyProfile(pid int, name string) error {
func ApplyProfile(name string) error {
return nil
}

View file

@ -4,11 +4,12 @@ package console
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
"syscall"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/system"
)
// Setup initializes the proper /dev/console inside the rootfs path

View file

@ -11,7 +11,6 @@ import (
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/system"
)
@ -51,11 +50,6 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil {
return fmt.Errorf("copy dev nodes %s", err)
}
if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
return fmt.Errorf("restrict %s", err)
}
}
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
return err
}
@ -124,22 +118,17 @@ func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
}
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
// standard bind mounts allowing them to be more dymanic
// standard bind mounts allowing them to be more dynamic
func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
systemMounts := []mount{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
}
if len(mounts.OfType("devtmpfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)})
}
systemMounts = append(systemMounts,
mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
)
if len(mounts.OfType("sysfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags})
}
return systemMounts
}

View file

@ -16,6 +16,7 @@ import (
"github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/user"
@ -68,18 +69,23 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string,
if err := system.Sethostname(container.Hostname); err != nil {
return fmt.Errorf("sethostname %s", err)
}
if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
runtime.LockOSThread()
if err := apparmor.ApplyProfile(os.Getpid(), container.Context["apparmor_profile"]); err != nil {
return err
if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil {
return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err)
}
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
return fmt.Errorf("set process label %s", err)
}
if container.Context["restrictions"] != "" {
if err := restrict.Restrict(); err != nil {
return err
}
}
if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
return system.Execv(args[0], args[0:], container.Env)
}

View file

@ -1,51 +1,25 @@
// +build linux
package restrict
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/dotcloud/docker/pkg/system"
)
const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY
var restrictions = map[string]string{
// dirs
"/proc/sys": "",
"/proc/irq": "",
"/proc/acpi": "",
// files
"/proc/sysrq-trigger": "/dev/null",
"/proc/kcore": "/dev/null",
}
// Restrict locks down access to many areas of proc
// by using the asumption that the user does not have mount caps to
// revert the changes made here
func Restrict(rootfs, empty string) error {
for dest, source := range restrictions {
dest = filepath.Join(rootfs, dest)
// we don't have a "/dev/null" for dirs so have the requester pass a dir
// for us to bind mount
switch source {
case "":
source = empty
default:
source = filepath.Join(rootfs, source)
}
if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
if os.IsNotExist(err) {
continue
}
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
}
if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
func Restrict() error {
// remount proc and sys as readonly
for _, dest := range []string{"proc", "sys"} {
if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
}
}
if err := system.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore")
}
return nil
}

View file

@ -0,0 +1,9 @@
// +build !linux
package restrict
import "fmt"
func Restrict() error {
return fmt.Errorf("not supported")
}