bump runc@b263a43430ac6996a4302b891688544225197294

Signed-off-by: Antonio Murdaca <runcom@redhat.com>
This commit is contained in:
Antonio Murdaca 2017-02-06 21:16:36 +01:00
parent 73a0881dbb
commit c258a2d8f0
No known key found for this signature in database
GPG key ID: B2BEAD150DE936B9
386 changed files with 9394 additions and 39467 deletions

View file

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/symlink"
"github.com/mrunalp/fileutils"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/label"
@ -25,51 +26,56 @@ import (
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// setupDev returns true if /dev needs to be set up.
// needsSetupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
for _, m := range config.Mounts {
if m.Device == "bind" && (m.Destination == "/dev" || m.Destination == "/dev/") {
if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
return false
}
}
return true
}
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
// prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro or pivot_root,
// because console setup happens inside the caller. You must call
// finalizeRootfs in order to finish the rootfs setup.
func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "preparing rootfs")
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
if err := mountCmd(precmd); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "running premount command")
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
}
for _, postcmd := range m.PostmountCmds {
if err := mountCmd(postcmd); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "running postmount command")
}
}
}
if setupDev {
if err := createDevices(config); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "creating device nodes")
}
if err := setupPtmx(config, console); err != nil {
return newSystemError(err)
if err := setupPtmx(config); err != nil {
return newSystemErrorWithCause(err, "setting up ptmx")
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting up /dev symlinks")
}
}
// Signal the parent to run the pre-start hooks.
// The hooks are run after the mounts are setup, but before we switch to the new
// root, so that the old root is still available in the hooks for any mount
@ -77,52 +83,70 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
if err := syncParentHooks(pipe); err != nil {
return err
}
// The reason these operations are done here rather than in finalizeRootfs
// is because the console-handling code gets quite sticky if we have to set
// up the console before doing the pivot_root(2). This is because the
// Console API has to also work with the ExecIn case, which means that the
// API must be able to deal with being inside as well as outside the
// container. It's just cleaner to do this here (at the expense of the
// operation not being perfectly split).
if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
} else {
err = pivotRoot(config.Rootfs, config.PivotDir)
err = pivotRoot(config.Rootfs)
}
if err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "jailing process inside rootfs")
}
if setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
}
}
// remount dev as ro if specifed
return nil
}
// finalizeRootfs actually switches the root of the process and sets anything
// to ro if necessary. You must call prepareRootfs first.
func finalizeRootfs(config *configs.Config) (err error) {
// remount dev as ro if specified
for _, m := range config.Mounts {
if m.Destination == "/dev" {
if m.Flags&syscall.MS_RDONLY != 0 {
if err := remountReadonly(m.Destination); err != nil {
return newSystemError(err)
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
if m.Flags&syscall.MS_RDONLY == syscall.MS_RDONLY {
if err := remountReadonly(m); err != nil {
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
}
}
break
}
}
// set rootfs ( / ) as readonly
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting rootfs as readonly")
}
}
syscall.Umask(0022)
return nil
}
func mountCmd(cmd configs.Command) error {
command := exec.Command(cmd.Path, cmd.Args[:]...)
command.Env = cmd.Env
command.Dir = cmd.Dir
if out, err := command.CombinedOutput(); err != nil {
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
}
return nil
}
@ -154,15 +178,41 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
return nil
case "tmpfs":
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
tmpDir := ""
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
if copyUp {
tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
if err != nil {
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
}
defer os.RemoveAll(tmpDir)
m.Destination = tmpDir
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
if copyUp {
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
if err1 := syscall.Unmount(tmpDir, syscall.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
if err := syscall.Mount(tmpDir, dest, "", syscall.MS_MOVE, ""); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
if err1 := syscall.Unmount(tmpDir, syscall.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
}
if stat != nil {
if err = os.Chmod(dest, stat.Mode()); err != nil {
return err
@ -240,34 +290,23 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
return err
}
}
// create symlinks for merged cgroups
cwd, err := os.Getwd()
if err != nil {
return err
}
if err := os.Chdir(filepath.Join(rootfs, m.Destination)); err != nil {
return err
}
for _, mc := range merged {
for _, ss := range strings.Split(mc, ",") {
if err := os.Symlink(mc, ss); err != nil {
// if cgroup already exists, then okay(it could have been created before)
if os.IsExist(err) {
continue
}
os.Chdir(cwd)
// symlink(2) is very dumb, it will just shove the path into
// the link and doesn't do any checks or relative path
// conversion. Also, don't error out if the cgroup already exists.
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
return err
}
}
}
if err := os.Chdir(cwd); err != nil {
return err
}
if m.Flags&syscall.MS_RDONLY != 0 {
// remount cgroup root as readonly
mcgrouproot := &configs.Mount{
Source: m.Destination,
Device: "bind",
Destination: m.Destination,
Flags: defaultMountFlags | syscall.MS_RDONLY,
Flags: defaultMountFlags | syscall.MS_RDONLY | syscall.MS_BIND,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
@ -283,7 +322,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
mounts, err := cgroups.GetCgroupMounts()
mounts, err := cgroups.GetCgroupMounts(false)
if err != nil {
return nil, err
}
@ -307,7 +346,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
binds = append(binds, &configs.Mount{
Device: "bind",
Source: filepath.Join(mm.Mountpoint, relDir),
Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags,
PropagationFlags: m.PropagationFlags,
})
@ -319,9 +358,6 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
// dest is required to be an abs path and have any symlinks resolved before calling this function.
func checkMountDestination(rootfs, dest string) error {
if libcontainerUtils.CleanPath(rootfs) == libcontainerUtils.CleanPath(dest) {
return fmt.Errorf("mounting into / is prohibited")
}
invalidDestinations := []string{
"/proc",
}
@ -333,6 +369,8 @@ func checkMountDestination(rootfs, dest string) error {
"/proc/diskstats",
"/proc/meminfo",
"/proc/stat",
"/proc/swaps",
"/proc/uptime",
"/proc/net/dev",
}
for _, valid := range validDestinations {
@ -515,10 +553,10 @@ func getParentMount(rootfs string) (string, string, error) {
}
// Make parent mount private if it was shared
func rootfsParentMountPrivate(config *configs.Config) error {
func rootfsParentMountPrivate(rootfs string) error {
sharedMount := false
parentMount, optionalOpts, err := getParentMount(config.Rootfs)
parentMount, optionalOpts, err := getParentMount(rootfs)
if err != nil {
return err
}
@ -551,7 +589,10 @@ func prepareRoot(config *configs.Config) error {
return err
}
if err := rootfsParentMountPrivate(config); err != nil {
// Make parent mount private to make sure following bind mount does
// not propagate in other namespaces. Also it will help with kernel
// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
return err
}
@ -562,7 +603,7 @@ func setReadonly() error {
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}
func setupPtmx(config *configs.Config, console *linuxConsole) error {
func setupPtmx(config *configs.Config) error {
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
@ -570,47 +611,61 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != nil {
return console.mount(config.Rootfs, config.MountLabel)
}
return nil
}
func pivotRoot(rootfs, pivotBaseDir string) (err error) {
if pivotBaseDir == "" {
pivotBaseDir = "/"
}
tmpDir := filepath.Join(rootfs, pivotBaseDir)
if err := os.MkdirAll(tmpDir, 0755); err != nil {
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
}
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
}
defer func() {
errVal := os.Remove(pivotDir)
if err == nil {
err = errVal
}
}()
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
// pivotRoot will call pivot_root such that rootfs becomes the new root
// filesystem, and everything else is cleaned up.
func pivotRoot(rootfs string) error {
// While the documentation may claim otherwise, pivot_root(".", ".") is
// actually valid. What this results in is / being the new root but
// /proc/self/cwd being the old root. Since we can play around with the cwd
// with pivot_root this allows us to pivot without creating directories in
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
// Make pivotDir rprivate to make sure any of the unmounts don't
// propagate to parent.
if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
oldroot, err := syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
if err != nil {
return err
}
defer syscall.Close(oldroot)
newroot, err := syscall.Open(rootfs, syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
if err != nil {
return err
}
defer syscall.Close(newroot)
// Change to the new root so that the pivot_root actually acts on it.
if err := syscall.Fchdir(newroot); err != nil {
return err
}
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
if err := syscall.PivotRoot(".", "."); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
// really any guarantee from the kernel what /proc/self/cwd will be after a
// pivot_root(2).
if err := syscall.Fchdir(oldroot); err != nil {
return err
}
// Make oldroot rprivate to make sure our unmounts don't propagate to the
// host (and thus bork the machine).
if err := syscall.Mount("", ".", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
return err
}
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
return err
}
// Switch back to our shiny new root.
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
return nil
}
@ -645,17 +700,26 @@ func createIfNotExists(path string, isDir bool) error {
return nil
}
// remountReadonly will bind over the top of an existing path and ensure that it is read-only.
func remountReadonly(path string) error {
// readonlyPath will make a path read only.
func readonlyPath(path string) error {
if err := syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}
// remountReadonly will remount an existing mount point and ensure that it is read-only.
func remountReadonly(m *configs.Mount) error {
var (
dest = m.Destination
flags = m.Flags
)
for i := 0; i < 5; i++ {
if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) {
if err := syscall.Mount("", dest, "", uintptr(flags|syscall.MS_REMOUNT|syscall.MS_RDONLY), ""); err != nil {
switch err {
case syscall.EINVAL:
// Probably not a mountpoint, use bind-mount
if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
return err
}
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
case syscall.EBUSY:
time.Sleep(100 * time.Millisecond)
continue
@ -665,13 +729,19 @@ func remountReadonly(path string) error {
}
return nil
}
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
}
// maskFile bind mounts /dev/null over the top of the specified path inside a container
// to avoid security issues from processes reading information from non-namespace aware mounts ( proc/kcore ).
func maskFile(path string) error {
// maskPath masks the top of the specified path inside a container to avoid
// security issues from processes reading information from non-namespace aware
// mounts ( proc/kcore ).
// For files, maskPath bind mounts /dev/null over the top of the specified path.
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
func maskPath(path string) error {
if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
if err == syscall.ENOTDIR {
return syscall.Mount("tmpfs", path, "tmpfs", syscall.MS_RDONLY, "")
}
return err
}
return nil
@ -705,10 +775,12 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
data = label.FormatMountLabel(m.Data, mountLabel)
flags = m.Flags
)
if dest == "/dev" {
if libcontainerUtils.CleanPath(dest) == "/dev" {
flags &= ^syscall.MS_RDONLY
}
if !strings.HasPrefix(dest, rootfs) {
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
dest = filepath.Join(rootfs, dest)
}