6b97c80b4d
Some applications want to write to /proc. For instance: docker run -it centos groupadd foo Gives: groupadd: failure while writing changes to /etc/group And strace reveals why: open("/proc/self/task/13/attr/fscreate", O_RDWR) = -1 EROFS (Read-only file system) I've looked at what other systems do, and systemd-nspawn makes /proc read-write and /proc/sys readonly, while lxc allows "proc:mixed" which does the same, plus it makes /proc/sysrq-trigger also readonly. The later seems like a prudent idea, so we follows lxc proc:mixed. Additionally we make /proc/irq and /proc/bus, as these seem to let you control various hardware things. Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
163 lines
4.8 KiB
Go
163 lines
4.8 KiB
Go
// +build linux
|
|
|
|
package nsinit
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
"syscall"
|
|
|
|
"github.com/dotcloud/docker/pkg/apparmor"
|
|
"github.com/dotcloud/docker/pkg/label"
|
|
"github.com/dotcloud/docker/pkg/libcontainer"
|
|
"github.com/dotcloud/docker/pkg/libcontainer/console"
|
|
"github.com/dotcloud/docker/pkg/libcontainer/mount"
|
|
"github.com/dotcloud/docker/pkg/libcontainer/network"
|
|
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
|
|
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
|
|
"github.com/dotcloud/docker/pkg/libcontainer/utils"
|
|
"github.com/dotcloud/docker/pkg/system"
|
|
"github.com/dotcloud/docker/pkg/user"
|
|
)
|
|
|
|
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
|
|
// and other options required for the new container.
|
|
func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
|
|
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// clear the current processes env and replace it with the environment
|
|
// defined on the container
|
|
if err := LoadContainerEnvironment(container); err != nil {
|
|
return err
|
|
}
|
|
|
|
// We always read this as it is a way to sync with the parent as well
|
|
context, err := syncPipe.ReadFromParent()
|
|
if err != nil {
|
|
syncPipe.Close()
|
|
return err
|
|
}
|
|
syncPipe.Close()
|
|
|
|
if consolePath != "" {
|
|
if err := console.OpenAndDup(consolePath); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if _, err := system.Setsid(); err != nil {
|
|
return fmt.Errorf("setsid %s", err)
|
|
}
|
|
if consolePath != "" {
|
|
if err := system.Setctty(); err != nil {
|
|
return fmt.Errorf("setctty %s", err)
|
|
}
|
|
}
|
|
if err := setupNetwork(container, context); err != nil {
|
|
return fmt.Errorf("setup networking %s", err)
|
|
}
|
|
|
|
label.Init()
|
|
|
|
if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil {
|
|
return fmt.Errorf("setup mount namespace %s", err)
|
|
}
|
|
if container.Hostname != "" {
|
|
if err := system.Sethostname(container.Hostname); err != nil {
|
|
return fmt.Errorf("sethostname %s", err)
|
|
}
|
|
}
|
|
|
|
runtime.LockOSThread()
|
|
|
|
if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil {
|
|
return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err)
|
|
}
|
|
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
|
|
return fmt.Errorf("set process label %s", err)
|
|
}
|
|
if container.Context["restrictions"] != "" {
|
|
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := FinalizeNamespace(container); err != nil {
|
|
return fmt.Errorf("finalize namespace %s", err)
|
|
}
|
|
return system.Execv(args[0], args[0:], container.Env)
|
|
}
|
|
|
|
// SetupUser changes the groups, gid, and uid for the user inside the container
|
|
func SetupUser(u string) error {
|
|
uid, gid, suppGids, err := user.GetUserGroupSupplementary(u, syscall.Getuid(), syscall.Getgid())
|
|
if err != nil {
|
|
return fmt.Errorf("get supplementary groups %s", err)
|
|
}
|
|
if err := system.Setgroups(suppGids); err != nil {
|
|
return fmt.Errorf("setgroups %s", err)
|
|
}
|
|
if err := system.Setgid(gid); err != nil {
|
|
return fmt.Errorf("setgid %s", err)
|
|
}
|
|
if err := system.Setuid(uid); err != nil {
|
|
return fmt.Errorf("setuid %s", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// setupVethNetwork uses the Network config if it is not nil to initialize
|
|
// the new veth interface inside the container for use by changing the name to eth0
|
|
// setting the MTU and IP address along with the default gateway
|
|
func setupNetwork(container *libcontainer.Container, context libcontainer.Context) error {
|
|
for _, config := range container.Networks {
|
|
strategy, err := network.GetStrategy(config.Type)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err1 := strategy.Initialize(config, context)
|
|
if err1 != nil {
|
|
return err1
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// FinalizeNamespace drops the caps, sets the correct user
|
|
// and working dir, and closes any leaky file descriptors
|
|
// before execing the command inside the namespace
|
|
func FinalizeNamespace(container *libcontainer.Container) error {
|
|
if err := capabilities.DropCapabilities(container); err != nil {
|
|
return fmt.Errorf("drop capabilities %s", err)
|
|
}
|
|
if err := system.CloseFdsFrom(3); err != nil {
|
|
return fmt.Errorf("close open file descriptors %s", err)
|
|
}
|
|
if err := SetupUser(container.User); err != nil {
|
|
return fmt.Errorf("setup user %s", err)
|
|
}
|
|
if container.WorkingDir != "" {
|
|
if err := system.Chdir(container.WorkingDir); err != nil {
|
|
return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func LoadContainerEnvironment(container *libcontainer.Container) error {
|
|
os.Clearenv()
|
|
for _, pair := range container.Env {
|
|
p := strings.SplitN(pair, "=", 2)
|
|
if len(p) < 2 {
|
|
return fmt.Errorf("invalid environment '%v'", pair)
|
|
}
|
|
if err := os.Setenv(p[0], p[1]); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|