pkg/libcontainer/nsinit/init.go
Dan Walsh 2b01982d1f This patch adds SELinux labeling support.
docker will run the process(es) within the container with an SELinux label and will label
all of  the content within the container with mount label.  Any temporary file systems
created within the container need to be mounted with the same mount label.

The user can override the process label by specifying

-Z With a string of space separated options.

-Z "user=unconfined_u role=unconfined_r type=unconfined_t level=s0"

Would cause the process label to run with unconfined_u:unconfined_r:unconfined_t:s0"

By default the processes will run execute within the container as svirt_lxc_net_t.
All of the content in the container as svirt_sandbox_file_t.

The process mcs level is based of the PID of the docker process that is creating the container.

If you run the container in --priv mode, the labeling will be disabled.

Docker-DCO-1.1-Signed-off-by: Dan Walsh <dwalsh@redhat.com> (github: rhatdan)
2014-03-26 15:30:40 -04:00

167 lines
5 KiB
Go

// +build linux
package nsinit
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/apparmor"
"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/user"
"os"
"runtime"
"syscall"
)
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
// and other options required for the new container.
func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error {
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
if err != nil {
return err
}
// We always read this as it is a way to sync with the parent as well
ns.logger.Printf("reading from sync pipe fd %d\n", syncPipe.child.Fd())
context, err := syncPipe.ReadFromParent()
if err != nil {
syncPipe.Close()
return err
}
ns.logger.Println("received context from parent")
syncPipe.Close()
if console != "" {
ns.logger.Printf("setting up %s as console\n", console)
slave, err := system.OpenTerminal(console, syscall.O_RDWR)
if err != nil {
return fmt.Errorf("open terminal %s", err)
}
if err := dupSlave(slave); err != nil {
return fmt.Errorf("dup2 slave %s", err)
}
}
if _, err := system.Setsid(); err != nil {
return fmt.Errorf("setsid %s", err)
}
if console != "" {
if err := system.Setctty(); err != nil {
return fmt.Errorf("setctty %s", err)
}
}
// this is our best effort to let the process know that the parent has died and that it
// should it should act on it how it sees fit
if err := system.ParentDeathSignal(uintptr(syscall.SIGTERM)); err != nil {
return fmt.Errorf("parent death signal %s", err)
}
ns.logger.Println("setup mount namespace")
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if err := setupNetwork(container, context); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := system.Sethostname(container.Hostname); err != nil {
return fmt.Errorf("sethostname %s", err)
}
if err := finalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
if profile := container.Context["apparmor_profile"]; profile != "" {
ns.logger.Printf("setting apparmor profile %s\n", profile)
if err := apparmor.ApplyProfile(os.Getpid(), profile); err != nil {
return err
}
}
runtime.LockOSThread()
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
return fmt.Errorf("SetProcessLabel label %s", err)
}
ns.logger.Printf("execing %s\n", args[0])
return system.Execv(args[0], args[0:], container.Env)
}
func setupUser(container *libcontainer.Container) error {
switch container.User {
case "root", "":
if err := system.Setgroups(nil); err != nil {
return err
}
if err := system.Setresgid(0, 0, 0); err != nil {
return err
}
if err := system.Setresuid(0, 0, 0); err != nil {
return err
}
default:
uid, gid, suppGids, err := user.GetUserGroupSupplementary(container.User, syscall.Getuid(), syscall.Getgid())
if err != nil {
return err
}
if err := system.Setgroups(suppGids); err != nil {
return err
}
if err := system.Setgid(gid); err != nil {
return err
}
if err := system.Setuid(uid); err != nil {
return err
}
}
return nil
}
// dupSlave dup2 the pty slave's fd into stdout and stdin and ensures that
// the slave's fd is 0, or stdin
func dupSlave(slave *os.File) error {
if err := system.Dup2(slave.Fd(), 0); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 1); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 2); err != nil {
return err
}
return nil
}
// setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway
func setupNetwork(container *libcontainer.Container, context libcontainer.Context) error {
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
err1 := strategy.Initialize(config, context)
if err1 != nil {
return err1
}
}
return nil
}
// finalizeNamespace drops the caps and sets the correct user
// and working dir before execing the command inside the namespace
func finalizeNamespace(container *libcontainer.Container) error {
if err := capabilities.DropCapabilities(container); err != nil {
return fmt.Errorf("drop capabilities %s", err)
}
if err := setupUser(container); err != nil {
return fmt.Errorf("setup user %s", err)
}
if container.WorkingDir != "" {
if err := system.Chdir(container.WorkingDir); err != nil {
return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
}
}
return nil
}