Move all bind-mounts in the container inside the namespace
This moves the bind mounts like /.dockerinit, /etc/hostname, volumes, etc into the container namespace, by setting them up using lxc. This is useful to avoid littering the global namespace with a lot of mounts that are internal to each container and are not generally needed on the outside. In particular, it seems that having a lot of mounts is problematic wrt scaling to a lot of containers on systems where the root filesystem is mounted --rshared. Note that the "private" option is only supported by the native driver, as lxc doesn't support setting this. This is not a huge problem, but it does mean that some mounts are unnecessarily shared inside the container if you're using the lxc driver. Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
This commit is contained in:
parent
f7fe084946
commit
eebaba3215
3 changed files with 30 additions and 2 deletions
|
@ -23,6 +23,7 @@ type Container struct {
|
||||||
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
|
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
|
||||||
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
|
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
|
||||||
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
|
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
|
||||||
|
Mounts []Mount `json:"mounts,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Network defines configuration for a container's networking stack
|
// Network defines configuration for a container's networking stack
|
||||||
|
@ -36,3 +37,12 @@ type Network struct {
|
||||||
Gateway string `json:"gateway,omitempty"`
|
Gateway string `json:"gateway,omitempty"`
|
||||||
Mtu int `json:"mtu,omitempty"`
|
Mtu int `json:"mtu,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bind mounts from the host system to the container
|
||||||
|
//
|
||||||
|
type Mount struct {
|
||||||
|
Source string `json:"source"` // Source path, in the host namespace
|
||||||
|
Destination string `json:"destination"` // Destination path, in the container
|
||||||
|
Writable bool `json:"writable"`
|
||||||
|
Private bool `json:"private"`
|
||||||
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
|
||||||
if err := system.ParentDeathSignal(); err != nil {
|
if err := system.ParentDeathSignal(); err != nil {
|
||||||
return fmt.Errorf("parent death signal %s", err)
|
return fmt.Errorf("parent death signal %s", err)
|
||||||
}
|
}
|
||||||
if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs, container.NoPivotRoot); err != nil {
|
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot); err != nil {
|
||||||
return fmt.Errorf("setup mount namespace %s", err)
|
return fmt.Errorf("setup mount namespace %s", err)
|
||||||
}
|
}
|
||||||
if err := setupNetwork(container, context); err != nil {
|
if err := setupNetwork(container, context); err != nil {
|
||||||
|
|
|
@ -4,6 +4,7 @@ package nsinit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/dotcloud/docker/pkg/libcontainer"
|
||||||
"github.com/dotcloud/docker/pkg/system"
|
"github.com/dotcloud/docker/pkg/system"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
@ -19,7 +20,7 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD
|
||||||
//
|
//
|
||||||
// There is no need to unmount the new mounts because as soon as the mount namespace
|
// There is no need to unmount the new mounts because as soon as the mount namespace
|
||||||
// is no longer in use, the mounts will be removed automatically
|
// is no longer in use, the mounts will be removed automatically
|
||||||
func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool) error {
|
func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool) error {
|
||||||
flag := syscall.MS_PRIVATE
|
flag := syscall.MS_PRIVATE
|
||||||
if noPivotRoot {
|
if noPivotRoot {
|
||||||
flag = syscall.MS_SLAVE
|
flag = syscall.MS_SLAVE
|
||||||
|
@ -38,6 +39,23 @@ func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool)
|
||||||
if err := mountSystem(rootfs); err != nil {
|
if err := mountSystem(rootfs); err != nil {
|
||||||
return fmt.Errorf("mount system %s", err)
|
return fmt.Errorf("mount system %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, m := range bindMounts {
|
||||||
|
flags := syscall.MS_BIND | syscall.MS_REC
|
||||||
|
if !m.Writable {
|
||||||
|
flags = flags | syscall.MS_RDONLY
|
||||||
|
}
|
||||||
|
dest := filepath.Join(rootfs, m.Destination)
|
||||||
|
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
||||||
|
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
||||||
|
}
|
||||||
|
if m.Private {
|
||||||
|
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
||||||
|
return fmt.Errorf("mounting %s private %s", dest, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err := copyDevNodes(rootfs); err != nil {
|
if err := copyDevNodes(rootfs); err != nil {
|
||||||
return fmt.Errorf("copy dev nodes %s", err)
|
return fmt.Errorf("copy dev nodes %s", err)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue