Bump up runtime-spec dependency to v1.0.0

Signed-off-by: Mrunal Patel <mpatel@redhat.com>
2017-07-19 21:07:01 -07:00 · 2017-07-19 21:07:01 -07:00 · 4128bbd7dc
commit 4128bbd7dc
parent 0eb5cd527f
83 changed files with 1020 additions and 14970 deletions
--- a/vendor/github.com/opencontainers/runc/README.md
+++ b/vendor/github.com/opencontainers/runc/README.md
@ -77,6 +77,12 @@ You can run a specific test case by setting the `TESTFLAGS` variable.
 # make test TESTFLAGS="-run=SomeTestFunction"
 ```

+### Dependencies Management
+
+`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
+Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
+new dependencies.
+
 ## Using runc

 ### Creating an OCI Bundle
@ -111,8 +117,8 @@ Assuming you have an OCI bundle from the previous step you can execute the conta
 The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits.

 ```bash
+# run as root
 cd /mycontainer
-
 runc run mycontainerid
 ```

@ -159,8 +165,8 @@ Now we can go though the lifecycle operations in your shell.


 ```bash
+# run as root
 cd /mycontainer
-
 runc create mycontainerid

 # view the container is created and in the "created" state
@ -179,6 +185,22 @@ runc delete mycontainerid
 This adds more complexity but allows higher level systems to manage runc and provides points in the containers creation to setup various settings after the container has created and/or before it is deleted.
 This is commonly used to setup the container's network stack after `create` but before `start` where the user's defined process will be running.

+#### Rootless containers
+`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user:
+```bash
+# Same as the first example
+mkdir ~/mycontainer
+cd ~/mycontainer
+mkdir rootfs
+docker export $(docker create busybox) | tar -C rootfs -xvf -
+
+# The --rootless parameter instructs runc spec to generate a configuration for a rootless container, which will allow you to run the container as a non-root user.
+runc spec --rootless
+
+# The --root parameter tells runc where to store the container state. It must be writable by the user.
+runc --root /tmp/runc run mycontainerid
+```
+
 #### Supervisors

 `runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit.
--- a/vendor/github.com/opencontainers/runc/libcontainer/README.md
+++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md
@ -56,25 +56,91 @@ Once you have an instance of the factory created we can create a configuration
 struct describing how the container is to be created. A sample would look similar to this:

 ```go
-defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
+defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
 config := &configs.Config{
 	Rootfs: "/your/path/to/rootfs",
-	Capabilities: []string{
-		"CAP_CHOWN",
-		"CAP_DAC_OVERRIDE",
-		"CAP_FSETID",
-		"CAP_FOWNER",
-		"CAP_MKNOD",
-		"CAP_NET_RAW",
-		"CAP_SETGID",
-		"CAP_SETUID",
-		"CAP_SETFCAP",
-		"CAP_SETPCAP",
-		"CAP_NET_BIND_SERVICE",
-		"CAP_SYS_CHROOT",
-		"CAP_KILL",
-		"CAP_AUDIT_WRITE",
-	},
+	Capabilities: &configs.Capabilities{
+                Bounding: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Effective: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Inheritable: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Permitted: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Ambient: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+        },
 	Namespaces: configs.Namespaces([]configs.Namespace{
 		{Type: configs.NEWNS},
 		{Type: configs.NEWUTS},
@ -112,14 +178,14 @@ config := &configs.Config{
 			Source:      "tmpfs",
 			Destination: "/dev",
 			Device:      "tmpfs",
-			Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
+			Flags:       unix.MS_NOSUID | unix.MS_STRICTATIME,
 			Data:        "mode=755",
 		},
 		{
 			Source:      "devpts",
 			Destination: "/dev/pts",
 			Device:      "devpts",
-			Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
+			Flags:       unix.MS_NOSUID | unix.MS_NOEXEC,
 			Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
 		},
 		{
@ -139,7 +205,7 @@ config := &configs.Config{
 			Source:      "sysfs",
 			Destination: "/sys",
 			Device:      "sysfs",
-			Flags:       defaultMountFlags | syscall.MS_RDONLY,
+			Flags:       defaultMountFlags | unix.MS_RDONLY,
 		},
 	},
 	UidMappings: []configs.IDMap{
@ -165,7 +231,7 @@ config := &configs.Config{
 	},
 	Rlimits: []configs.Rlimit{
 		{
-			Type: syscall.RLIMIT_NOFILE,
+			Type: unix.RLIMIT_NOFILE,
 			Hard: uint64(1025),
 			Soft: uint64(1025),
 		},
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
 	}, nil
 }

-func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
-	// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
-	// process could in container and shared pid namespace with host, and
-	// /proc/1/cgroup could point to whole other world of cgroups.
-	initPath, err := cgroups.GetThisCgroupDir(subsystem)
-	if err != nil {
-		return "", err
-	}
-	// This is needed for nested containers, because in /proc/self/cgroup we
-	// see pathes from host, which don't exist in container.
-	relDir, err := filepath.Rel(root, initPath)
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(mountpoint, relDir), nil
-}
-
 func (raw *cgroupData) path(subsystem string) (string, error) {
-	mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
+	mnt, err := cgroups.FindCgroupMountpoint(subsystem)
 	// If we didn't mount the subsystem, there is no point we make the path.
 	if err != nil {
 		return "", err
@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
 		return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
 	}

-	parentPath, err := raw.parentPath(subsystem, mnt, root)
+	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
+	// process could in container and shared pid namespace with host, and
+	// /proc/1/cgroup could point to whole other world of cgroups.
+	parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
 	if err != nil {
 		return "", err
 	}
@ -346,8 +332,8 @@ func removePath(p string, err error) error {
 	return nil
 }

-func CheckCpushares(path string, c int64) error {
-	var cpuShares int64
+func CheckCpushares(path string, c uint64) error {
+	var cpuShares uint64

 	if c == 0 {
 		return nil
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
@ -55,7 +55,7 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error

 func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
 	if cgroup.Resources.CpuRtPeriod != 0 {
-		if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
+		if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
 			return err
 		}
 	}
@ -69,12 +69,12 @@ func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {

 func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
 	if cgroup.Resources.CpuShares != 0 {
-		if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
+		if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
 			return err
 		}
 	}
 	if cgroup.Resources.CpuPeriod != 0 {
-		if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
+		if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
 			return err
 		}
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
@ -57,10 +57,11 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
 	if dir == "" {
 		return nil
 	}
-	root, err := getCgroupRoot()
+	mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
 	if err != nil {
 		return err
 	}
+	root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
 	// 'ensureParent' start with parent because we don't want to
 	// explicitly inherit from parent, it could conflict with
 	// 'cpuset.cpu_exclusive'.
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
@ -10,13 +10,19 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
-	"syscall"
+	"syscall" // only for Errno

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
+
+	"golang.org/x/sys/unix"
 )

-const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
+const (
+	cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
+	cgroupMemorySwapLimit   = "memory.memsw.limit_in_bytes"
+	cgroupMemoryLimit       = "memory.limit_in_bytes"
+)

 type MemoryGroup struct {
 }
@ -29,14 +35,18 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
 	path, err := d.path("memory")
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
+	} else if path == "" {
+		return nil
 	}
 	if memoryAssigned(d.config) {
-		if path != "" {
+		if _, err := os.Stat(path); os.IsNotExist(err) {
 			if err := os.MkdirAll(path, 0755); err != nil {
 				return err
 			}
-		}
-		if d.config.KernelMemory != 0 {
+			// Only enable kernel memory accouting when this cgroup
+			// is created by libcontainer, otherwise we might get
+			// error when people use `cgroupsPath` to join an existed
+			// cgroup whose kernel memory is not initialized.
 			if err := EnableKernelMemoryAccounting(path); err != nil {
 				return err
 			}
@ -85,7 +95,7 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
 		// once tasks have been attached to the cgroup
 		if pathErr, ok := err.(*os.PathError); ok {
 			if errNo, ok := pathErr.Err.(syscall.Errno); ok {
-				if errNo == syscall.EBUSY {
+				if errNo == unix.EBUSY {
 					return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
 				}
 			}
@ -96,9 +106,18 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
 }

 func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
+	// If the memory update is set to -1 we should also
+	// set swap to -1, it means unlimited memory.
+	if cgroup.Resources.Memory == -1 {
+		// Only set swap if it's enabled in kernel
+		if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
+			cgroup.Resources.MemorySwap = -1
+		}
+	}
+
 	// When memory and swap memory are both set, we need to handle the cases
 	// for updating container.
-	if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 {
+	if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
 		memoryUsage, err := getMemoryData(path, "")
 		if err != nil {
 			return err
@ -107,29 +126,29 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
 		// When update memory limit, we should adapt the write sequence
 		// for memory and swap memory, so it won't fail because the new
 		// value and the old value don't fit kernel's validation.
-		if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
-			if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+		if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
-			if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
 		} else {
-			if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
-			if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
 		}
 	} else {
 		if cgroup.Resources.Memory != 0 {
-			if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
 		}
-		if cgroup.Resources.MemorySwap > 0 {
-			if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+		if cgroup.Resources.MemorySwap != 0 {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
 		}
@ -167,12 +186,12 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
 	}
 	if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
 		return nil
-	} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
-		if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
+	} else if *cgroup.Resources.MemorySwappiness <= 100 {
+		if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
 			return err
 		}
 	} else {
-		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
+		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
 	}

 	return nil
@ -224,6 +243,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
 	}
 	stats.MemoryStats.KernelTCPUsage = kernelTCPUsage

+	useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
+	value, err := getCgroupParamUint(path, useHierarchy)
+	if err != nil {
+		return err
+	}
+	if value == 1 {
+		stats.MemoryStats.UseHierarchy = true
+	}
 	return nil
 }

@ -234,7 +261,7 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
 		cgroup.Resources.KernelMemory > 0 ||
 		cgroup.Resources.KernelMemoryTCP > 0 ||
 		cgroup.Resources.OomKillDisable ||
-		(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
+		(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
 }

 func getMemoryData(path, name string) (cgroups.MemoryData, error) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
@ -51,6 +51,8 @@ type MemoryStats struct {
 	KernelUsage MemoryData `json:"kernel_usage,omitempty"`
 	// usage of kernel TCP memory
 	KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
+	// if true, memory usage is accounted for throughout a hierarchy of cgroups.
+	UseHierarchy bool `json:"use_hierarchy"`

 	Stats map[string]uint64 `json:"stats,omitempty"`
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
@ -5,7 +5,6 @@ package systemd
 import (
 	"errors"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
@ -261,12 +260,19 @@ func (m *Manager) Apply(pid int) error {

 	if c.Resources.Memory != 0 {
 		properties = append(properties,
-			newProp("MemoryLimit", uint64(c.Resources.Memory)))
+			newProp("MemoryLimit", c.Resources.Memory))
 	}

 	if c.Resources.CpuShares != 0 {
 		properties = append(properties,
-			newProp("CPUShares", uint64(c.Resources.CpuShares)))
+			newProp("CPUShares", c.Resources.CpuShares))
+	}
+
+	// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
+	if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
+		cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
+		properties = append(properties,
+			newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
 	}

 	if c.Resources.BlkioWeight != 0 {
@ -327,15 +333,6 @@ func (m *Manager) GetPaths() map[string]string {
 	return paths
 }

-func writeFile(dir, file, data string) error {
-	// Normally dir should not be empty, one case is that cgroup subsystem
-	// is not mounted, we will get empty dir, and we want it fail here.
-	if dir == "" {
-		return fmt.Errorf("no such directory for %s", file)
-	}
-	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
-}
-
 func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
 	path, err := getSubsystemPath(c, subsystem)
 	if err != nil {
@ -429,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
 		return "", err
 	}

-	initPath, err := cgroups.GetInitCgroupDir(subsystem)
+	initPath, err := cgroups.GetInitCgroup(subsystem)
 	if err != nil {
 		return "", err
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@ -66,6 +66,21 @@ func isSubsystemAvailable(subsystem string) bool {
 	return avail
 }

+func GetClosestMountpointAncestor(dir, mountinfo string) string {
+	deepestMountPoint := ""
+	for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
+		mountInfoParts := strings.Fields(mountInfoEntry)
+		if len(mountInfoParts) < 5 {
+			continue
+		}
+		mountPoint := mountInfoParts[4]
+		if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
+			deepestMountPoint = mountPoint
+		}
+	}
+	return deepestMountPoint
+}
+
 func FindCgroupMountpointDir() (string, error) {
 	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
@ -109,7 +124,7 @@ type Mount struct {
 	Subsystems []string
 }

-func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
+func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
 	if len(m.Subsystems) == 0 {
 		return "", fmt.Errorf("no subsystem for mount")
 	}
@ -203,8 +218,8 @@ func GetAllSubsystems() ([]string, error) {
 	return subsystems, nil
 }

-// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
-func GetThisCgroupDir(subsystem string) (string, error) {
+// GetOwnCgroup returns the relative path to the cgroup docker is running in.
+func GetOwnCgroup(subsystem string) (string, error) {
 	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
 	if err != nil {
 		return "", err
@ -213,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
 	return getControllerPath(subsystem, cgroups)
 }

-func GetInitCgroupDir(subsystem string) (string, error) {
+func GetOwnCgroupPath(subsystem string) (string, error) {
+	cgroup, err := GetOwnCgroup(subsystem)
+	if err != nil {
+		return "", err
+	}

+	return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func GetInitCgroup(subsystem string) (string, error) {
 	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
 	if err != nil {
 		return "", err
@ -223,6 +246,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
 	return getControllerPath(subsystem, cgroups)
 }

+func GetInitCgroupPath(subsystem string) (string, error) {
+	cgroup, err := GetInitCgroup(subsystem)
+	if err != nil {
+		return "", err
+	}
+
+	return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
+	mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
+	if err != nil {
+		return "", err
+	}
+
+	// This is needed for nested containers, because in /proc/self/cgroup we
+	// see pathes from host, which don't exist in container.
+	relCgroup, err := filepath.Rel(root, cgroup)
+	if err != nil {
+		return "", err
+	}
+
+	return filepath.Join(mnt, relCgroup), nil
+}
+
 func readProcsFile(dir string) ([]int, error) {
 	f, err := os.Open(filepath.Join(dir, CgroupProcesses))
 	if err != nil {
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
@ -1,5 +1,3 @@
-// +build linux freebsd
-
 package configs

 type FreezerState string
@ -60,19 +58,19 @@ type Resources struct {
 	KernelMemoryTCP int64 `json:"kernel_memory_tcp"`

 	// CPU shares (relative weight vs. other containers)
-	CpuShares int64 `json:"cpu_shares"`
+	CpuShares uint64 `json:"cpu_shares"`

 	// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
 	CpuQuota int64 `json:"cpu_quota"`

 	// CPU period to be used for hardcapping (in usecs). 0 to use system default.
-	CpuPeriod int64 `json:"cpu_period"`
+	CpuPeriod uint64 `json:"cpu_period"`

 	// How many time CPU will use in realtime scheduling (in usecs).
 	CpuRtRuntime int64 `json:"cpu_rt_quota"`

 	// CPU period to be used for realtime scheduling (in usecs).
-	CpuRtPeriod int64 `json:"cpu_rt_period"`
+	CpuRtPeriod uint64 `json:"cpu_rt_period"`

 	// CPU to use
 	CpusetCpus string `json:"cpuset_cpus"`
@ -114,7 +112,7 @@ type Resources struct {
 	OomKillDisable bool `json:"oom_kill_disable"`

 	// Tuning swappiness behaviour per cgroup
-	MemorySwappiness *int64 `json:"memory_swappiness"`
+	MemorySwappiness *uint64 `json:"memory_swappiness"`

 	// Set priority of network traffic for container
 	NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@ -113,8 +113,8 @@ type Config struct {
 	Namespaces Namespaces `json:"namespaces"`

 	// Capabilities specify the capabilities to keep when executing the process inside the container
-	// All capbilities not specified will be dropped from the processes capability mask
-	Capabilities []string `json:"capabilities"`
+	// All capabilities not specified will be dropped from the processes capability mask
+	Capabilities *Capabilities `json:"capabilities"`

 	// Networks specifies the container's network setup to be created
 	Networks []*Network `json:"networks"`
@ -183,6 +183,9 @@ type Config struct {
 	// NoNewKeyring will not allocated a new session keyring for the container.  It will use the
 	// callers keyring in this case.
 	NoNewKeyring bool `json:"no_new_keyring"`
+
+	// Rootless specifies whether the container is a rootless container.
+	Rootless bool `json:"rootless"`
 }

 type Hooks struct {
@ -197,6 +200,19 @@ type Hooks struct {
 	Poststop []Hook
 }

+type Capabilities struct {
+	// Bounding is the set of capabilities checked by the kernel.
+	Bounding []string
+	// Effective is the set of capabilities checked by the kernel.
+	Effective []string
+	// Inheritable is the capabilities preserved across execve.
+	Inheritable []string
+	// Permitted is the limiting superset for effective capabilities.
+	Permitted []string
+	// Ambient is the ambient set of capabilities that are kept.
+	Ambient []string
+}
+
 func (hooks *Hooks) UnmarshalJSON(b []byte) error {
 	var state struct {
 		Prestart  []CommandHook
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
@ -0,0 +1,61 @@
+package configs
+
+import "fmt"
+
+// HostUID gets the translated uid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostUID(containerId int) (int, error) {
+	if c.Namespaces.Contains(NEWUSER) {
+		if c.UidMappings == nil {
+			return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
+		}
+		id, found := c.hostIDFromMapping(containerId, c.UidMappings)
+		if !found {
+			return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
+		}
+		return id, nil
+	}
+	// Return unchanged id.
+	return containerId, nil
+}
+
+// HostRootUID gets the root uid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootUID() (int, error) {
+	return c.HostUID(0)
+}
+
+// HostGID gets the translated gid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostGID(containerId int) (int, error) {
+	if c.Namespaces.Contains(NEWUSER) {
+		if c.GidMappings == nil {
+			return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
+		}
+		id, found := c.hostIDFromMapping(containerId, c.GidMappings)
+		if !found {
+			return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
+		}
+		return id, nil
+	}
+	// Return unchanged id.
+	return containerId, nil
+}
+
+// HostRootGID gets the root gid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootGID() (int, error) {
+	return c.HostGID(0)
+}
+
+// Utility function that gets a host ID for a container ID from user namespace map
+// if that ID is present in the map.
+func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
+	for _, m := range uMap {
+		if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
+			hostID := m.HostID + (containerID - m.ContainerID)
+			return hostID, true
+		}
+	}
+	return -1, false
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
@ -1,51 +0,0 @@
-// +build freebsd linux
-
-package configs
-
-import "fmt"
-
-// HostUID gets the root uid for the process on host which could be non-zero
-// when user namespaces are enabled.
-func (c Config) HostUID() (int, error) {
-	if c.Namespaces.Contains(NEWUSER) {
-		if c.UidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
-		}
-		id, found := c.hostIDFromMapping(0, c.UidMappings)
-		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
-		}
-		return id, nil
-	}
-	// Return default root uid 0
-	return 0, nil
-}
-
-// HostGID gets the root gid for the process on host which could be non-zero
-// when user namespaces are enabled.
-func (c Config) HostGID() (int, error) {
-	if c.Namespaces.Contains(NEWUSER) {
-		if c.GidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
-		}
-		id, found := c.hostIDFromMapping(0, c.GidMappings)
-		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
-		}
-		return id, nil
-	}
-	// Return default root gid 0
-	return 0, nil
-}
-
-// Utility function that gets a host ID for a container ID from user namespace map
-// if that ID is present in the map.
-func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
-	for _, m := range uMap {
-		if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
-			hostID := m.HostID + (containerID - m.ContainerID)
-			return hostID, true
-		}
-	}
-	return -1, false
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
@ -1,5 +1,3 @@
-// +build linux freebsd
-
 package configs

 import (
@ -64,12 +62,12 @@ func IsNamespaceSupported(ns NamespaceType) bool {

 func NamespaceTypes() []NamespaceType {
 	return []NamespaceType{
+		NEWUSER, // Keep user NS always first, don't move it.
+		NEWIPC,
+		NEWUTS,
 		NEWNET,
 		NEWPID,
 		NEWNS,
-		NEWUTS,
-		NEWIPC,
-		NEWUSER,
 	}
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
@ -2,19 +2,19 @@

 package configs

-import "syscall"
+import "golang.org/x/sys/unix"

 func (n *Namespace) Syscall() int {
 	return namespaceInfo[n.Type]
 }

 var namespaceInfo = map[NamespaceType]int{
-	NEWNET:  syscall.CLONE_NEWNET,
-	NEWNS:   syscall.CLONE_NEWNS,
-	NEWUSER: syscall.CLONE_NEWUSER,
-	NEWIPC:  syscall.CLONE_NEWIPC,
-	NEWUTS:  syscall.CLONE_NEWUTS,
-	NEWPID:  syscall.CLONE_NEWPID,
+	NEWNET:  unix.CLONE_NEWNET,
+	NEWNS:   unix.CLONE_NEWNS,
+	NEWUSER: unix.CLONE_NEWUSER,
+	NEWIPC:  unix.CLONE_NEWIPC,
+	NEWUTS:  unix.CLONE_NEWUTS,
+	NEWPID:  unix.CLONE_NEWPID,
 }

 // CloneFlags parses the container's Namespaces options to set the correct
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
@ -1,4 +1,4 @@
-// +build !linux,!freebsd
+// +build !linux

 package configs

--- a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_linux.go
@ -1,5 +1,3 @@
-// +build linux freebsd
-
 package devices

 import (
@ -8,9 +6,11 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	"syscall"
+	"syscall" //only for Stat_t

 	"github.com/opencontainers/runc/libcontainer/configs"
+
+	"golang.org/x/sys/unix"
 )

 var (
@ -38,10 +38,10 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
 	case mode&os.ModeDevice == 0:
 		return nil, ErrNotADevice
 	case mode&os.ModeCharDevice != 0:
-		fileModePermissionBits |= syscall.S_IFCHR
+		fileModePermissionBits |= unix.S_IFCHR
 		devType = 'c'
 	default:
-		fileModePermissionBits |= syscall.S_IFBLK
+		fileModePermissionBits |= unix.S_IFBLK
 		devType = 'b'
 	}
 	stat_t, ok := fileInfo.Sys().(*syscall.Stat_t)
@ -75,7 +75,8 @@ func getDevices(path string) ([]*configs.Device, error) {
 		switch {
 		case f.IsDir():
 			switch f.Name() {
-			case "pts", "shm", "fd", "mqueue":
+			// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
+			case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
 				continue
 			default:
 				sub, err := getDevices(filepath.Join(path, f.Name()))
--- a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_unsupported.go
@ -1,3 +1,3 @@
-// +build windows
+// +build !linux

 package devices
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
@ -33,7 +33,8 @@ enum sync_t {
 	SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
 	SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
 	SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
-	SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */
+	SYNC_GRANDCHILD  = 0x44, /* The grandchild is ready to run. */
+	SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */

 	/* XXX: This doesn't help with segfaults and other such issues. */
 	SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
@ -71,18 +72,23 @@ struct nlconfig_t {
 	char *namespaces;
 	size_t namespaces_len;
 	uint8_t is_setgroup;
+	uint8_t is_rootless;
+	char *oom_score_adj;
+	size_t oom_score_adj_len;
 };

 /*
 * List of netlink message types sent to us as part of bootstrapping the init.
 * These constants are defined in libcontainer/message_linux.go.
 */
-#define INIT_MSG		62000
+#define INIT_MSG			62000
 #define CLONE_FLAGS_ATTR	27281
 #define NS_PATHS_ATTR		27282
-#define UIDMAP_ATTR		27283
-#define GIDMAP_ATTR		27284
+#define UIDMAP_ATTR			27283
+#define GIDMAP_ATTR			27284
 #define SETGROUP_ATTR		27285
+#define OOM_SCORE_ADJ_ATTR	27286
+#define ROOTLESS_ATTR	    27287

 /*
 * Use the raw syscall for versions of glibc which don't include a function for
@ -171,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
 			policy = "deny";
 			break;
 		case SETGROUPS_DEFAULT:
+		default:
 			/* Nothing to do. */
 			return;
 	}
@ -185,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
 	}
 }

-static void update_uidmap(int pid, char *map, int map_len)
+static void update_uidmap(int pid, char *map, size_t map_len)
 {
 	if (map == NULL || map_len <= 0)
 		return;
@ -194,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len)
 		bail("failed to update /proc/%d/uid_map", pid);
 }

-static void update_gidmap(int pid, char *map, int map_len)
+static void update_gidmap(int pid, char *map, size_t map_len)
 {
 	if (map == NULL || map_len <= 0)
 		return;
@ -203,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len)
 		bail("failed to update /proc/%d/gid_map", pid);
 }

+static void update_oom_score_adj(char *data, size_t len)
+{
+	if (data == NULL || len <= 0)
+		return;
+
+	if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
+		bail("failed to update /proc/self/oom_score_adj");
+}
+
 /* A dummy function that just jumps to the given jumpval. */
 static int child_func(void *arg) __attribute__ ((noinline));
 static int child_func(void *arg)
@ -284,7 +300,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
 	/* Retrieve the netlink header. */
 	len = read(fd, &hdr, NLMSG_HDRLEN);
 	if (len != NLMSG_HDRLEN)
-		bail("invalid netlink header length %lu", len);
+		bail("invalid netlink header length %zu", len);

 	if (hdr.nlmsg_type == NLMSG_ERROR)
 		bail("failed to read netlink message");
@ -300,7 +316,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)

 	len = read(fd, data, size);
 	if (len != size)
-		bail("failed to read netlink payload, %lu != %lu", len, size);
+		bail("failed to read netlink payload, %zu != %zu", len, size);

 	/* Parse the netlink payload. */
 	config->data = data;
@ -316,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config)
 		case CLONE_FLAGS_ATTR:
 			config->cloneflags = readint32(current);
 			break;
+		case ROOTLESS_ATTR:
+			config->is_rootless = readint8(current);
+			break;
+		case OOM_SCORE_ADJ_ATTR:
+			config->oom_score_adj = current;
+			config->oom_score_adj_len = payload_len;
+			break;
 		case NS_PATHS_ATTR:
 			config->namespaces = current;
 			config->namespaces_len = payload_len;
@ -413,7 +436,7 @@ void nsexec(void)
 {
 	int pipenum;
 	jmp_buf env;
-	int syncpipe[2];
+	int sync_child_pipe[2], sync_grandchild_pipe[2];
 	struct nlconfig_t config = {0};

 	/*
@ -424,18 +447,43 @@ void nsexec(void)
 	if (pipenum == -1)
 		return;

-	/* make the process non-dumpable */
-	if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
-		bail("failed to set process as non-dumpable");
-	}
-
 	/* Parse all of the netlink configuration. */
 	nl_parse(pipenum, &config);

+	/* Set oom_score_adj. This has to be done before !dumpable because
+	 * /proc/self/oom_score_adj is not writeable unless you're an privileged
+	 * user (if !dumpable is set). All children inherit their parent's
+	 * oom_score_adj value on fork(2) so this will always be propagated
+	 * properly.
+	 */
+	update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
+
+	/*
+	 * Make the process non-dumpable, to avoid various race conditions that
+	 * could cause processes in namespaces we're joining to access host
+	 * resources (or potentially execute code).
+	 *
+	 * However, if the number of namespaces we are joining is 0, we are not
+	 * going to be switching to a different security context. Thus setting
+	 * ourselves to be non-dumpable only breaks things (like rootless
+	 * containers), which is the recommendation from the kernel folks.
+	 */
+	if (config.namespaces) {
+		if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
+			bail("failed to set process as non-dumpable");
+	}
+
 	/* Pipe so we can tell the child when we've finished setting up. */
-	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
+	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
 		bail("failed to setup sync pipe between parent and child");

+	/*
+	 * We need a new socketpair to sync with grandchild so we don't have
+	 * race condition with child.
+	 */
+	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
+		bail("failed to setup sync pipe between parent and grandchild");
+
 	/* TODO: Currently we aren't dealing with child deaths properly. */

 	/*
@ -494,9 +542,10 @@ void nsexec(void)
 	 *          process.
 	 */
 	case JUMP_PARENT: {
-			int len, ready = 0;
+			int len;
 			pid_t child;
 			char buf[JSON_MAX];
+			bool ready = false;

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
@ -513,30 +562,39 @@ void nsexec(void)
 			 * ready, so we can receive all possible error codes
 			 * generated by children.
 			 */
-			while (ready < 2) {
+			while (!ready) {
 				enum sync_t s;
+				int ret;

-				/* This doesn't need to be global, we're in the parent. */
-				int syncfd = syncpipe[1];
+				syncfd = sync_child_pipe[1];
+				close(sync_child_pipe[0]);

 				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with child: next state");

 				switch (s) {
-				case SYNC_ERR: {
-						/* We have to mirror the error code of the child. */
-						int ret;
+				case SYNC_ERR:
+					/* We have to mirror the error code of the child. */
+					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+						bail("failed to sync with child: read(error code)");

-						if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
-							bail("failed to sync with child: read(error code)");
-
-						exit(ret);
-					}
-					break;
+					exit(ret);
 				case SYNC_USERMAP_PLS:
-					/* Enable setgroups(2) if we've been asked to. */
+					/*
+					 * Enable setgroups(2) if we've been asked to. But we also
+					 * have to explicitly disable setgroups(2) if we're
+					 * creating a rootless container (this is required since
+					 * Linux 3.19).
+					 */
+					if (config.is_rootless && config.is_setgroup) {
+						kill(child, SIGKILL);
+						bail("cannot allow setgroup in an unprivileged user namespace setup");
+					}
+
 					if (config.is_setgroup)
 						update_setgroups(child, SETGROUPS_ALLOW);
+					if (config.is_rootless)
+						update_setgroups(child, SETGROUPS_DENY);

 					/* Set up mappings. */
 					update_uidmap(child, config.uidmap, config.uidmap_len);
@ -548,11 +606,6 @@ void nsexec(void)
 						bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
 					}
 					break;
-				case SYNC_USERMAP_ACK:
-					/* We should _never_ receive acks. */
-					kill(child, SIGKILL);
-					bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
-					break;
 				case SYNC_RECVPID_PLS: {
 						pid_t old = child;

@ -570,20 +623,46 @@ void nsexec(void)
 							bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
 						}
 					}
-
-					ready++;
-					break;
-				case SYNC_RECVPID_ACK:
-					/* We should _never_ receive acks. */
-					kill(child, SIGKILL);
-					bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
 					break;
 				case SYNC_CHILD_READY:
-					ready++;
+					ready = true;
 					break;
 				default:
-					bail("unexpected sync value");
+					bail("unexpected sync value: %u", s);
+				}
+			}
+
+			/* Now sync with grandchild. */
+
+			ready = false;
+			while (!ready) {
+				enum sync_t s;
+				int ret;
+
+				syncfd = sync_grandchild_pipe[1];
+				close(sync_grandchild_pipe[0]);
+
+				s = SYNC_GRANDCHILD;
+				if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+					kill(child, SIGKILL);
+					bail("failed to sync with child: write(SYNC_GRANDCHILD)");
+				}
+
+				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+					bail("failed to sync with child: next state");
+
+				switch (s) {
+				case SYNC_ERR:
+					/* We have to mirror the error code of the child. */
+					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+						bail("failed to sync with child: read(error code)");
+
+					exit(ret);
+				case SYNC_CHILD_READY:
+					ready = true;
 					break;
+				default:
+					bail("unexpected sync value: %u", s);
 				}
 			}

@ -615,7 +694,8 @@ void nsexec(void)
 			enum sync_t s;

 			/* We're in a child and thus need to tell the parent if we die. */
-			syncfd = syncpipe[0];
+			syncfd = sync_child_pipe[0];
+			close(sync_child_pipe[1]);

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
@ -653,6 +733,11 @@ void nsexec(void)
 				 * clone_parent rant). So signal our parent to hook us up.
 				 */

+				/* Switching is only necessary if we joined namespaces. */
+				if (config.namespaces) {
+					if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
+						bail("failed to set process as dumpable");
+				}
 				s = SYNC_USERMAP_PLS;
 				if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
@ -663,6 +748,11 @@ void nsexec(void)
 					bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
 				if (s != SYNC_USERMAP_ACK)
 					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
+				/* Switching is only necessary if we joined namespaces. */
+				if (config.namespaces) {
+					if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
+						bail("failed to set process as dumpable");
+				}
 			}

 			/*
@ -700,6 +790,12 @@ void nsexec(void)
 				bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
 			}

+			s = SYNC_CHILD_READY;
+			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+				kill(child, SIGKILL);
+				bail("failed to sync with parent: write(SYNC_CHILD_READY)");
+			}
+
 			/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
 			exit(0);
 		}
@ -718,11 +814,19 @@ void nsexec(void)
 			enum sync_t s;

 			/* We're in a child and thus need to tell the parent if we die. */
-			syncfd = syncpipe[0];
+			syncfd = sync_grandchild_pipe[0];
+			close(sync_grandchild_pipe[1]);
+			close(sync_child_pipe[0]);
+			close(sync_child_pipe[1]);

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);

+			if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+				bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
+			if (s != SYNC_GRANDCHILD)
+				bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
+
 			if (setsid() < 0)
 				bail("setsid failed");

@ -732,16 +836,17 @@ void nsexec(void)
 			if (setgid(0) < 0)
 				bail("setgid failed");

-			if (setgroups(0, NULL) < 0)
-				bail("setgroups failed");
+			if (!config.is_rootless && config.is_setgroup) {
+				if (setgroups(0, NULL) < 0)
+					bail("setgroups failed");
+			}

 			s = SYNC_CHILD_READY;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 				bail("failed to sync with patent: write(SYNC_CHILD_READY)");

 			/* Close sync pipes. */
-			close(syncpipe[0]);
-			close(syncpipe[1]);
+			close(sync_grandchild_pipe[0]);

 			/* Free netlink data. */
 			nl_free(&config);
@ -751,7 +856,6 @@ void nsexec(void)
 		}
 	default:
 		bail("unexpected jump value");
-		break;
 	}

 	/* Should never be reached. */
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
@ -7,8 +7,10 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
-	"syscall"
+	"syscall" // only for exec
 	"unsafe"
+
+	"golang.org/x/sys/unix"
 )

 // If arg2 is nonzero, set the "child subreaper" attribute of the
@ -53,8 +55,8 @@ func Execv(cmd string, args []string, env []string) error {
 	return syscall.Exec(name, args, env)
 }

-func Prlimit(pid, resource int, limit syscall.Rlimit) error {
-	_, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
+func Prlimit(pid, resource int, limit unix.Rlimit) error {
+	_, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
 	if err != 0 {
 		return err
 	}
@ -62,7 +64,7 @@ func Prlimit(pid, resource int, limit syscall.Rlimit) error {
 }

 func SetParentDeathSignal(sig uintptr) error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
 		return err
 	}
 	return nil
@ -70,15 +72,14 @@ func SetParentDeathSignal(sig uintptr) error {

 func GetParentDeathSignal() (ParentDeathSignal, error) {
 	var sig int
-	_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
-	if err != 0 {
+	if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
 		return -1, err
 	}
 	return ParentDeathSignal(sig), nil
 }

 func SetKeepCaps() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
 		return err
 	}

@ -86,7 +87,7 @@ func SetKeepCaps() error {
 }

 func ClearKeepCaps() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
 		return err
 	}

@ -94,7 +95,7 @@ func ClearKeepCaps() error {
 }

 func Setctty() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
+	if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
 		return err
 	}
 	return nil
@ -131,13 +132,5 @@ func RunningInUserNS() bool {

 // SetSubreaper sets the value i as the subreaper setting for the calling process
 func SetSubreaper(i int) error {
-	return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
-}
-
-func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
-	_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
-	if e1 != 0 {
-		err = e1
-	}
-	return
+	return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
@ -1,43 +1,113 @@
 package system

 import (
+	"fmt"
 	"io/ioutil"
 	"path/filepath"
 	"strconv"
 	"strings"
 )

-// look in /proc to find the process start time so that we can verify
-// that this pid has started after ourself
+// State is the status of a process.
+type State rune
+
+const ( // Only values for Linux 3.14 and later are listed here
+	Dead        State = 'X'
+	DiskSleep   State = 'D'
+	Running     State = 'R'
+	Sleeping    State = 'S'
+	Stopped     State = 'T'
+	TracingStop State = 't'
+	Zombie      State = 'Z'
+)
+
+// String forms of the state from proc(5)'s documentation for
+// /proc/[pid]/status' "State" field.
+func (s State) String() string {
+	switch s {
+	case Dead:
+		return "dead"
+	case DiskSleep:
+		return "disk sleep"
+	case Running:
+		return "running"
+	case Sleeping:
+		return "sleeping"
+	case Stopped:
+		return "stopped"
+	case TracingStop:
+		return "tracing stop"
+	case Zombie:
+		return "zombie"
+	default:
+		return fmt.Sprintf("unknown (%c)", s)
+	}
+}
+
+// Stat_t represents the information from /proc/[pid]/stat, as
+// described in proc(5) with names based on the /proc/[pid]/status
+// fields.
+type Stat_t struct {
+	// PID is the process ID.
+	PID uint
+
+	// Name is the command run by the process.
+	Name string
+
+	// State is the state of the process.
+	State State
+
+	// StartTime is the number of clock ticks after system boot (since
+	// Linux 2.6).
+	StartTime uint64
+}
+
+// Stat returns a Stat_t instance for the specified process.
+func Stat(pid int) (stat Stat_t, err error) {
+	bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
+	if err != nil {
+		return stat, err
+	}
+	return parseStat(string(bytes))
+}
+
+// GetProcessStartTime is deprecated.  Use Stat(pid) and
+// Stat_t.StartTime instead.
 func GetProcessStartTime(pid int) (string, error) {
-	data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
+	stat, err := Stat(pid)
 	if err != nil {
 		return "", err
 	}
-	return parseStartTime(string(data))
+	return fmt.Sprintf("%d", stat.StartTime), nil
 }

-func parseStartTime(stat string) (string, error) {
-	// the starttime is located at pos 22
-	// from the man page
-	//
-	// starttime %llu (was %lu before Linux 2.6)
-	// (22)  The  time the process started after system boot.  In kernels before Linux 2.6, this
-	// value was expressed in jiffies.  Since Linux 2.6, the value is expressed in  clock  ticks
-	// (divide by sysconf(_SC_CLK_TCK)).
-	//
-	// NOTE:
-	// pos 2 could contain space and is inside `(` and `)`:
-	// (2) comm  %s
-	// The filename of the executable, in parentheses.
-	// This is visible whether or not the executable is
-	// swapped out.
-	//
-	// the following is an example:
+func parseStat(data string) (stat Stat_t, err error) {
+	// From proc(5), field 2 could contain space and is inside `(` and `)`.
+	// The following is an example:
 	// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
+	i := strings.LastIndex(data, ")")
+	if i <= 2 || i >= len(data)-1 {
+		return stat, fmt.Errorf("invalid stat data: %q", data)
+	}

-	// get parts after last `)`:
-	s := strings.Split(stat, ")")
-	parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
-	return parts[22-3], nil // starts at 3 (after the filename pos `2`)
+	parts := strings.SplitN(data[:i], "(", 2)
+	if len(parts) != 2 {
+		return stat, fmt.Errorf("invalid stat data: %q", data)
+	}
+
+	stat.Name = parts[1]
+	_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
+	if err != nil {
+		return stat, err
+	}
+
+	// parts indexes should be offset by 3 from the field number given
+	// proc(5), because parts is zero-indexed and we've removed fields
+	// one (PID) and two (Name) in the paren-split.
+	parts = strings.Split(data[i+2:], " ")
+	var state int
+	fmt.Sscanf(parts[3-3], "%c", &state)
+	stat.State = State(state)
+	fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
+	return stat, nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
@ -1,40 +0,0 @@
-package system
-
-import (
-	"fmt"
-	"runtime"
-	"syscall"
-)
-
-// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
-//
-// We need different setns values for the different platforms and arch
-// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
-var setNsMap = map[string]uintptr{
-	"linux/386":     346,
-	"linux/arm64":   268,
-	"linux/amd64":   308,
-	"linux/arm":     375,
-	"linux/ppc":     350,
-	"linux/ppc64":   350,
-	"linux/ppc64le": 350,
-	"linux/s390x":   339,
-}
-
-var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
-
-func SysSetns() uint32 {
-	return uint32(sysSetns)
-}
-
-func Setns(fd uintptr, flags uintptr) error {
-	ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
-	if !exists {
-		return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
-	}
-	_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
-	if err != 0 {
-		return err
-	}
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
@ -3,12 +3,12 @@
 package system

 import (
-	"syscall"
+	"golang.org/x/sys/unix"
 )

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {

 // Setgid sets the gid of the calling thread to the specified gid.
 func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
@ -3,12 +3,12 @@
 package system

 import (
-	"syscall"
+	"golang.org/x/sys/unix"
 )

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {

 // Setgid sets the gid of the calling thread to the specified gid.
 func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
@ -3,12 +3,12 @@
 package system

 import (
-	"syscall"
+	"golang.org/x/sys/unix"
 )

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {

 // Setgid sets the gid of the calling thread to the specified gid.
 func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go
@ -1,99 +1,35 @@
 package system

-import (
-	"syscall"
-	"unsafe"
-)
-
-var _zero uintptr
-
-// Returns the size of xattrs and nil error
-// Requires path, takes allocated []byte or nil as last argument
-func Llistxattr(path string, dest []byte) (size int, err error) {
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return -1, err
-	}
-	var newpathBytes unsafe.Pointer
-	if len(dest) > 0 {
-		newpathBytes = unsafe.Pointer(&dest[0])
-	} else {
-		newpathBytes = unsafe.Pointer(&_zero)
-	}
-
-	_size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0)
-	size = int(_size)
-	if errno != 0 {
-		return -1, errno
-	}
-
-	return size, nil
-}
+import "golang.org/x/sys/unix"

 // Returns a []byte slice if the xattr is set and nil otherwise
 // Requires path and its attribute as arguments
 func Lgetxattr(path string, attr string) ([]byte, error) {
 	var sz int
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return nil, err
-	}
-	attrBytes, err := syscall.BytePtrFromString(attr)
-	if err != nil {
-		return nil, err
-	}
-
 	// Start with a 128 length byte array
-	sz = 128
-	dest := make([]byte, sz)
-	destBytes := unsafe.Pointer(&dest[0])
-	_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
+	dest := make([]byte, 128)
+	sz, errno := unix.Lgetxattr(path, attr, dest)

 	switch {
-	case errno == syscall.ENODATA:
+	case errno == unix.ENODATA:
 		return nil, errno
-	case errno == syscall.ENOTSUP:
+	case errno == unix.ENOTSUP:
 		return nil, errno
-	case errno == syscall.ERANGE:
+	case errno == unix.ERANGE:
 		// 128 byte array might just not be good enough,
-		// A dummy buffer is used ``uintptr(0)`` to get real size
+		// A dummy buffer is used to get the real size
 		// of the xattrs on disk
-		_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
-		sz = int(_sz)
-		if sz < 0 {
+		sz, errno = unix.Lgetxattr(path, attr, []byte{})
+		if errno != nil {
 			return nil, errno
 		}
 		dest = make([]byte, sz)
-		destBytes := unsafe.Pointer(&dest[0])
-		_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
-		if errno != 0 {
+		sz, errno = unix.Lgetxattr(path, attr, dest)
+		if errno != nil {
 			return nil, errno
 		}
-	case errno != 0:
+	case errno != nil:
 		return nil, errno
 	}
-	sz = int(_sz)
 	return dest[:sz], nil
 }
-
-func Lsetxattr(path string, attr string, data []byte, flags int) error {
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return err
-	}
-	attrBytes, err := syscall.BytePtrFromString(attr)
-	if err != nil {
-		return err
-	}
-	var dataBytes unsafe.Pointer
-	if len(data) > 0 {
-		dataBytes = unsafe.Pointer(&data[0])
-	} else {
-		dataBytes = unsafe.Pointer(&_zero)
-	}
-	_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
-	if errno != 0 {
-		return errno
-	}
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
@ -2,7 +2,8 @@ package user

 import (
 	"errors"
-	"syscall"
+
+	"golang.org/x/sys/unix"
 )

 var (
@ -40,7 +41,7 @@ func lookupUser(filter func(u User) bool) (User, error) {
 // user cannot be found (or there is no /etc/passwd file on the filesystem),
 // then CurrentUser returns an error.
 func CurrentUser() (User, error) {
-	return LookupUid(syscall.Getuid())
+	return LookupUid(unix.Getuid())
 }

 // LookupUser looks up a user by their username in /etc/passwd. If the user
@ -88,7 +89,7 @@ func lookupGroup(filter func(g Group) bool) (Group, error) {
 // entry in /etc/passwd. If the group cannot be found (or there is no
 // /etc/group file on the filesystem), then CurrentGroup returns an error.
 func CurrentGroup() (Group, error) {
-	return LookupGid(syscall.Getgid())
+	return LookupGid(unix.Getgid())
 }

 // LookupGroup looks up a group by its name in /etc/group. If the group cannot
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
@ -199,18 +199,16 @@ type ExecUser struct {
 // files cannot be opened for any reason, the error is ignored and a nil
 // io.Reader is passed instead.
 func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
-	passwd, err := os.Open(passwdPath)
-	if err != nil {
-		passwd = nil
-	} else {
-		defer passwd.Close()
+	var passwd, group io.Reader
+
+	if passwdFile, err := os.Open(passwdPath); err == nil {
+		passwd = passwdFile
+		defer passwdFile.Close()
 	}

-	group, err := os.Open(groupPath)
-	if err != nil {
-		group = nil
-	} else {
-		defer group.Close()
+	if groupFile, err := os.Open(groupPath); err == nil {
+		group = groupFile
+		defer groupFile.Close()
 	}

 	return GetExecUser(userSpec, defaults, passwd, group)
@ -360,8 +358,8 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (

 				// Okay, so it's numeric. We can just roll with this.
 			}
-		} else if len(groups) > 0 {
-			// Supplementary group ids only make sense if in the implicit form.
+		} else if len(groups) > 0 && uidErr != nil {
+			// Supplementary group ids only make sense if in the implicit form for non-numeric users.
 			user.Sgids = make([]int, len(groups))
 			for i, group := range groups {
 				user.Sgids[i] = group.Gid
@ -433,9 +431,11 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
 // that opens the groupPath given and gives it as an argument to
 // GetAdditionalGroups.
 func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
-	group, err := os.Open(groupPath)
-	if err == nil {
-		defer group.Close()
+	var group io.Reader
+
+	if groupFile, err := os.Open(groupPath); err == nil {
+		group = groupFile
+		defer groupFile.Close()
 	}
 	return GetAdditionalGroups(additionalGroups, group)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
@ -1,148 +0,0 @@
-/*
- * Copyright 2016 SUSE LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "cmsg.h"
-
-#define error(fmt, ...)							\
-	({								\
-		fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
-		errno = ECOMM;						\
-		goto err; /* return value */				\
-	})
-
-/*
- * Sends a file descriptor along the sockfd provided. Returns the return
- * value of sendmsg(2). Any synchronisation and preparation of state
- * should be done external to this (we expect the other side to be in
- * recvfd() in the code).
- */
-ssize_t sendfd(int sockfd, struct file_t file)
-{
-	struct msghdr msg = {0};
-	struct iovec iov[1] = {0};
-	struct cmsghdr *cmsg;
-	int *fdptr;
-	int ret;
-
-	union {
-		char buf[CMSG_SPACE(sizeof(file.fd))];
-		struct cmsghdr align;
-	} u;
-
-	/*
-	 * We need to send some other data along with the ancillary data,
-	 * otherwise the other side won't recieve any data. This is very
-	 * well-hidden in the documentation (and only applies to
-	 * SOCK_STREAM). See the bottom part of unix(7).
-	 */
-	iov[0].iov_base = file.name;
-	iov[0].iov_len = strlen(file.name) + 1;
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_iov = iov;
-	msg.msg_iovlen = 1;
-	msg.msg_control = u.buf;
-	msg.msg_controllen = sizeof(u.buf);
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SCM_RIGHTS;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(int));
-
-	fdptr = (int *) CMSG_DATA(cmsg);
-	memcpy(fdptr, &file.fd, sizeof(int));
-
-	return sendmsg(sockfd, &msg, 0);
-}
-
-/*
- * Receives a file descriptor from the sockfd provided. Returns the file
- * descriptor as sent from sendfd(). It will return the file descriptor
- * or die (literally) trying. Any synchronisation and preparation of
- * state should be done external to this (we expect the other side to be
- * in sendfd() in the code).
- */
-struct file_t recvfd(int sockfd)
-{
-	struct msghdr msg = {0};
-	struct iovec iov[1] = {0};
-	struct cmsghdr *cmsg;
-	struct file_t file = {0};
-	int *fdptr;
-	int olderrno;
-
-	union {
-		char buf[CMSG_SPACE(sizeof(file.fd))];
-		struct cmsghdr align;
-	} u;
-
-	/* Allocate a buffer. */
-	/* TODO: Make this dynamic with MSG_PEEK. */
-	file.name = malloc(TAG_BUFFER);
-	if (!file.name)
-		error("recvfd: failed to allocate file.tag buffer\n");
-
-	/*
-	 * We need to "recieve" the non-ancillary data even though we don't
-	 * plan to use it at all. Otherwise, things won't work as expected.
-	 * See unix(7) and other well-hidden documentation.
-	 */
-	iov[0].iov_base = file.name;
-	iov[0].iov_len = TAG_BUFFER;
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_iov = iov;
-	msg.msg_iovlen = 1;
-	msg.msg_control = u.buf;
-	msg.msg_controllen = sizeof(u.buf);
-
-	ssize_t ret = recvmsg(sockfd, &msg, 0);
-	if (ret < 0)
-		goto err;
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	if (!cmsg)
-		error("recvfd: got NULL from CMSG_FIRSTHDR");
-	if (cmsg->cmsg_level != SOL_SOCKET)
-		error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
-	if (cmsg->cmsg_type != SCM_RIGHTS)
-		error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
-	if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
-		error("recvfd: expected correct CMSG_LEN in cmsg: %lu", (unsigned long)cmsg->cmsg_len);
-
-	fdptr = (int *) CMSG_DATA(cmsg);
-	if (!fdptr || *fdptr < 0)
-		error("recvfd: recieved invalid pointer");
-
-	file.fd = *fdptr;
-	return file;
-
-err:
-	olderrno = errno;
-	free(file.name);
-	errno = olderrno;
-	return (struct file_t){0};
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
@ -3,7 +3,7 @@
 package utils

 /*
- * Copyright 2016 SUSE LLC
+ * Copyright 2016, 2017 SUSE LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -18,28 +18,66 @@ package utils
 * limitations under the License.
 */

-/*
-#include <errno.h>
-#include <stdlib.h>
-#include "cmsg.h"
-*/
-import "C"
-
 import (
+	"fmt"
 	"os"
-	"unsafe"
+
+	"golang.org/x/sys/unix"
 )

+// MaxSendfdLen is the maximum length of the name of a file descriptor being
+// sent using SendFd. The name of the file handle returned by RecvFd will never
+// be larger than this value.
+const MaxNameLen = 4096
+
+// oobSpace is the size of the oob slice required to store a single FD. Note
+// that unix.UnixRights appears to make the assumption that fd is always int32,
+// so sizeof(fd) = 4.
+var oobSpace = unix.CmsgSpace(4)
+
 // RecvFd waits for a file descriptor to be sent over the given AF_UNIX
 // socket. The file name of the remote file descriptor will be recreated
 // locally (it is sent as non-auxiliary data in the same payload).
 func RecvFd(socket *os.File) (*os.File, error) {
-	file, err := C.recvfd(C.int(socket.Fd()))
+	// For some reason, unix.Recvmsg uses the length rather than the capacity
+	// when passing the msg_controllen and other attributes to recvmsg.  So we
+	// have to actually set the length.
+	name := make([]byte, MaxNameLen)
+	oob := make([]byte, oobSpace)
+
+	sockfd := socket.Fd()
+	n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
 	if err != nil {
 		return nil, err
 	}
-	defer C.free(unsafe.Pointer(file.name))
-	return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil
+
+	if n >= MaxNameLen || oobn != oobSpace {
+		return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
+	}
+
+	// Truncate.
+	name = name[:n]
+	oob = oob[:oobn]
+
+	scms, err := unix.ParseSocketControlMessage(oob)
+	if err != nil {
+		return nil, err
+	}
+	if len(scms) != 1 {
+		return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
+	}
+	scm := scms[0]
+
+	fds, err := unix.ParseUnixRights(&scm)
+	if err != nil {
+		return nil, err
+	}
+	if len(fds) != 1 {
+		return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
+	}
+	fd := uintptr(fds[0])
+
+	return os.NewFile(fd, string(name)), nil
 }

 // SendFd sends a file descriptor over the given AF_UNIX socket. In
@ -47,11 +85,11 @@ func RecvFd(socket *os.File) (*os.File, error) {
 // non-auxiliary data in the same payload (allowing to send contextual
 // information for a file descriptor).
 func SendFd(socket, file *os.File) error {
-	var cfile C.struct_file_t
-	cfile.fd = C.int(file.Fd())
-	cfile.name = C.CString(file.Name())
-	defer C.free(unsafe.Pointer(cfile.name))
+	name := []byte(file.Name())
+	if len(name) >= MaxNameLen {
+		return fmt.Errorf("sendfd: filename too long: %s", file.Name())
+	}
+	oob := unix.UnixRights(int(file.Fd()))

-	_, err := C.sendfd(C.int(socket.Fd()), cfile)
-	return err
+	return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
@ -1,36 +0,0 @@
-/*
- * Copyright 2016 SUSE LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#if !defined(CMSG_H)
-#define CMSG_H
-
-#include <sys/types.h>
-
-/* TODO: Implement this properly with MSG_PEEK. */
-#define TAG_BUFFER 4096
-
-/* This mirrors Go's (*os.File). */
-struct file_t {
-	char *name;
-	int fd;
-};
-
-struct file_t recvfd(int sockfd);
-ssize_t sendfd(int sockfd, struct file_t file);
-
-#endif /* !defined(CMSG_H) */
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
@ -8,8 +8,9 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
-	"syscall"
 	"unsafe"
+
+	"golang.org/x/sys/unix"
 )

 const (
@ -41,7 +42,7 @@ func ResolveRootfs(uncleanRootfs string) (string, error) {

 // ExitStatus returns the correct exit status for a process based on if it
 // was signaled or exited cleanly
-func ExitStatus(status syscall.WaitStatus) int {
+func ExitStatus(status unix.WaitStatus) int {
 	if status.Signaled() {
 		return exitSignalOffset + int(status.Signal())
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
@ -4,8 +4,10 @@ package utils

 import (
 	"io/ioutil"
+	"os"
 	"strconv"
-	"syscall"
+
+	"golang.org/x/sys/unix"
 )

 func CloseExecFrom(minFd int) error {
@ -25,9 +27,18 @@ func CloseExecFrom(minFd int) error {
 			continue
 		}

-		// intentionally ignore errors from syscall.CloseOnExec
-		syscall.CloseOnExec(fd)
+		// intentionally ignore errors from unix.CloseOnExec
+		unix.CloseOnExec(fd)
 		// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
 	}
 	return nil
 }
+
+// NewSockPair returns a new unix socket pair
+func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
+	fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
+	if err != nil {
+		return nil, nil, err
+	}
+	return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
+}
--- a/vendor/github.com/opencontainers/runc/vendor.conf
+++ b/vendor/github.com/opencontainers/runc/vendor.conf
@ -0,0 +1,21 @@
+# OCI runtime-spec. When updating this, make sure you use a version tag rather
+# than a commit ID so it's much more obvious what version of the spec we are
+# using.
+github.com/opencontainers/runtime-spec v1.0.0
+# Core libcontainer functionality.
+github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08
+github.com/opencontainers/selinux v1.0.0-rc1
+github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
+github.com/Sirupsen/logrus 26709e2714106fb8ad40b773b711ebce25b78914
+github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16
+github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270
+# systemd integration.
+github.com/coreos/go-systemd v14
+github.com/coreos/pkg v3
+github.com/godbus/dbus v3
+github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8
+# Command-line interface.
+github.com/docker/docker 0f5c9d301b9b1cca66b3ea0f9dec3b5317d3686d
+github.com/docker/go-units v0.2.0
+github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e
+golang.org/x/sys 0e0164865330d5cf1c00247be08330bf96e2f87c https://github.com/golang/sys