Bump up runtime-spec dependency to v1.0.0

Signed-off-by: Mrunal Patel <mpatel@redhat.com>
2017-07-19 21:07:01 -07:00 · 2017-07-19 21:07:01 -07:00 · 4128bbd7dc
commit 4128bbd7dc
parent 0eb5cd527f
83 changed files with 1020 additions and 14970 deletions
--- a/vendor/github.com/opencontainers/runc/libcontainer/README.md
+++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md
@ -56,25 +56,91 @@ Once you have an instance of the factory created we can create a configuration
 struct describing how the container is to be created. A sample would look similar to this:

 ```go
-defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
+defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
 config := &configs.Config{
 	Rootfs: "/your/path/to/rootfs",
-	Capabilities: []string{
-		"CAP_CHOWN",
-		"CAP_DAC_OVERRIDE",
-		"CAP_FSETID",
-		"CAP_FOWNER",
-		"CAP_MKNOD",
-		"CAP_NET_RAW",
-		"CAP_SETGID",
-		"CAP_SETUID",
-		"CAP_SETFCAP",
-		"CAP_SETPCAP",
-		"CAP_NET_BIND_SERVICE",
-		"CAP_SYS_CHROOT",
-		"CAP_KILL",
-		"CAP_AUDIT_WRITE",
-	},
+	Capabilities: &configs.Capabilities{
+                Bounding: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Effective: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Inheritable: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Permitted: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+                Ambient: []string{
+                        "CAP_CHOWN",
+                        "CAP_DAC_OVERRIDE",
+                        "CAP_FSETID",
+                        "CAP_FOWNER",
+                        "CAP_MKNOD",
+                        "CAP_NET_RAW",
+                        "CAP_SETGID",
+                        "CAP_SETUID",
+                        "CAP_SETFCAP",
+                        "CAP_SETPCAP",
+                        "CAP_NET_BIND_SERVICE",
+                        "CAP_SYS_CHROOT",
+                        "CAP_KILL",
+                        "CAP_AUDIT_WRITE",
+                },
+        },
 	Namespaces: configs.Namespaces([]configs.Namespace{
 		{Type: configs.NEWNS},
 		{Type: configs.NEWUTS},
@ -112,14 +178,14 @@ config := &configs.Config{
 			Source:      "tmpfs",
 			Destination: "/dev",
 			Device:      "tmpfs",
-			Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
+			Flags:       unix.MS_NOSUID | unix.MS_STRICTATIME,
 			Data:        "mode=755",
 		},
 		{
 			Source:      "devpts",
 			Destination: "/dev/pts",
 			Device:      "devpts",
-			Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
+			Flags:       unix.MS_NOSUID | unix.MS_NOEXEC,
 			Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
 		},
 		{
@ -139,7 +205,7 @@ config := &configs.Config{
 			Source:      "sysfs",
 			Destination: "/sys",
 			Device:      "sysfs",
-			Flags:       defaultMountFlags | syscall.MS_RDONLY,
+			Flags:       defaultMountFlags | unix.MS_RDONLY,
 		},
 	},
 	UidMappings: []configs.IDMap{
@ -165,7 +231,7 @@ config := &configs.Config{
 	},
 	Rlimits: []configs.Rlimit{
 		{
-			Type: syscall.RLIMIT_NOFILE,
+			Type: unix.RLIMIT_NOFILE,
 			Hard: uint64(1025),
 			Soft: uint64(1025),
 		},
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
 	}, nil
 }

-func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
-	// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
-	// process could in container and shared pid namespace with host, and
-	// /proc/1/cgroup could point to whole other world of cgroups.
-	initPath, err := cgroups.GetThisCgroupDir(subsystem)
-	if err != nil {
-		return "", err
-	}
-	// This is needed for nested containers, because in /proc/self/cgroup we
-	// see pathes from host, which don't exist in container.
-	relDir, err := filepath.Rel(root, initPath)
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(mountpoint, relDir), nil
-}
-
 func (raw *cgroupData) path(subsystem string) (string, error) {
-	mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
+	mnt, err := cgroups.FindCgroupMountpoint(subsystem)
 	// If we didn't mount the subsystem, there is no point we make the path.
 	if err != nil {
 		return "", err
@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
 		return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
 	}

-	parentPath, err := raw.parentPath(subsystem, mnt, root)
+	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
+	// process could in container and shared pid namespace with host, and
+	// /proc/1/cgroup could point to whole other world of cgroups.
+	parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
 	if err != nil {
 		return "", err
 	}
@ -346,8 +332,8 @@ func removePath(p string, err error) error {
 	return nil
 }

-func CheckCpushares(path string, c int64) error {
-	var cpuShares int64
+func CheckCpushares(path string, c uint64) error {
+	var cpuShares uint64

 	if c == 0 {
 		return nil
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
@ -55,7 +55,7 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error

 func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
 	if cgroup.Resources.CpuRtPeriod != 0 {
-		if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
+		if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
 			return err
 		}
 	}
@ -69,12 +69,12 @@ func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {

 func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
 	if cgroup.Resources.CpuShares != 0 {
-		if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
+		if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
 			return err
 		}
 	}
 	if cgroup.Resources.CpuPeriod != 0 {
-		if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
+		if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
 			return err
 		}
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
@ -57,10 +57,11 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
 	if dir == "" {
 		return nil
 	}
-	root, err := getCgroupRoot()
+	mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
 	if err != nil {
 		return err
 	}
+	root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
 	// 'ensureParent' start with parent because we don't want to
 	// explicitly inherit from parent, it could conflict with
 	// 'cpuset.cpu_exclusive'.
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
@ -10,13 +10,19 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
-	"syscall"
+	"syscall" // only for Errno

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
+
+	"golang.org/x/sys/unix"
 )

-const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
+const (
+	cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
+	cgroupMemorySwapLimit   = "memory.memsw.limit_in_bytes"
+	cgroupMemoryLimit       = "memory.limit_in_bytes"
+)

 type MemoryGroup struct {
 }
@ -29,14 +35,18 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
 	path, err := d.path("memory")
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
+	} else if path == "" {
+		return nil
 	}
 	if memoryAssigned(d.config) {
-		if path != "" {
+		if _, err := os.Stat(path); os.IsNotExist(err) {
 			if err := os.MkdirAll(path, 0755); err != nil {
 				return err
 			}
-		}
-		if d.config.KernelMemory != 0 {
+			// Only enable kernel memory accouting when this cgroup
+			// is created by libcontainer, otherwise we might get
+			// error when people use `cgroupsPath` to join an existed
+			// cgroup whose kernel memory is not initialized.
 			if err := EnableKernelMemoryAccounting(path); err != nil {
 				return err
 			}
@ -85,7 +95,7 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
 		// once tasks have been attached to the cgroup
 		if pathErr, ok := err.(*os.PathError); ok {
 			if errNo, ok := pathErr.Err.(syscall.Errno); ok {
-				if errNo == syscall.EBUSY {
+				if errNo == unix.EBUSY {
 					return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
 				}
 			}
@ -96,9 +106,18 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
 }

 func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
+	// If the memory update is set to -1 we should also
+	// set swap to -1, it means unlimited memory.
+	if cgroup.Resources.Memory == -1 {
+		// Only set swap if it's enabled in kernel
+		if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
+			cgroup.Resources.MemorySwap = -1
+		}
+	}
+
 	// When memory and swap memory are both set, we need to handle the cases
 	// for updating container.
-	if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 {
+	if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
 		memoryUsage, err := getMemoryData(path, "")
 		if err != nil {
 			return err
@ -107,29 +126,29 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
 		// When update memory limit, we should adapt the write sequence
 		// for memory and swap memory, so it won't fail because the new
 		// value and the old value don't fit kernel's validation.
-		if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
-			if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+		if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
-			if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
 		} else {
-			if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
-			if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
 		}
 	} else {
 		if cgroup.Resources.Memory != 0 {
-			if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
 		}
-		if cgroup.Resources.MemorySwap > 0 {
-			if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+		if cgroup.Resources.MemorySwap != 0 {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
 		}
@ -167,12 +186,12 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
 	}
 	if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
 		return nil
-	} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
-		if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
+	} else if *cgroup.Resources.MemorySwappiness <= 100 {
+		if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
 			return err
 		}
 	} else {
-		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
+		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
 	}

 	return nil
@ -224,6 +243,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
 	}
 	stats.MemoryStats.KernelTCPUsage = kernelTCPUsage

+	useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
+	value, err := getCgroupParamUint(path, useHierarchy)
+	if err != nil {
+		return err
+	}
+	if value == 1 {
+		stats.MemoryStats.UseHierarchy = true
+	}
 	return nil
 }

@ -234,7 +261,7 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
 		cgroup.Resources.KernelMemory > 0 ||
 		cgroup.Resources.KernelMemoryTCP > 0 ||
 		cgroup.Resources.OomKillDisable ||
-		(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
+		(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
 }

 func getMemoryData(path, name string) (cgroups.MemoryData, error) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
@ -51,6 +51,8 @@ type MemoryStats struct {
 	KernelUsage MemoryData `json:"kernel_usage,omitempty"`
 	// usage of kernel TCP memory
 	KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
+	// if true, memory usage is accounted for throughout a hierarchy of cgroups.
+	UseHierarchy bool `json:"use_hierarchy"`

 	Stats map[string]uint64 `json:"stats,omitempty"`
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
@ -5,7 +5,6 @@ package systemd
 import (
 	"errors"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
@ -261,12 +260,19 @@ func (m *Manager) Apply(pid int) error {

 	if c.Resources.Memory != 0 {
 		properties = append(properties,
-			newProp("MemoryLimit", uint64(c.Resources.Memory)))
+			newProp("MemoryLimit", c.Resources.Memory))
 	}

 	if c.Resources.CpuShares != 0 {
 		properties = append(properties,
-			newProp("CPUShares", uint64(c.Resources.CpuShares)))
+			newProp("CPUShares", c.Resources.CpuShares))
+	}
+
+	// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
+	if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
+		cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
+		properties = append(properties,
+			newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
 	}

 	if c.Resources.BlkioWeight != 0 {
@ -327,15 +333,6 @@ func (m *Manager) GetPaths() map[string]string {
 	return paths
 }

-func writeFile(dir, file, data string) error {
-	// Normally dir should not be empty, one case is that cgroup subsystem
-	// is not mounted, we will get empty dir, and we want it fail here.
-	if dir == "" {
-		return fmt.Errorf("no such directory for %s", file)
-	}
-	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
-}
-
 func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
 	path, err := getSubsystemPath(c, subsystem)
 	if err != nil {
@ -429,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
 		return "", err
 	}

-	initPath, err := cgroups.GetInitCgroupDir(subsystem)
+	initPath, err := cgroups.GetInitCgroup(subsystem)
 	if err != nil {
 		return "", err
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@ -66,6 +66,21 @@ func isSubsystemAvailable(subsystem string) bool {
 	return avail
 }

+func GetClosestMountpointAncestor(dir, mountinfo string) string {
+	deepestMountPoint := ""
+	for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
+		mountInfoParts := strings.Fields(mountInfoEntry)
+		if len(mountInfoParts) < 5 {
+			continue
+		}
+		mountPoint := mountInfoParts[4]
+		if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
+			deepestMountPoint = mountPoint
+		}
+	}
+	return deepestMountPoint
+}
+
 func FindCgroupMountpointDir() (string, error) {
 	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
@ -109,7 +124,7 @@ type Mount struct {
 	Subsystems []string
 }

-func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
+func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
 	if len(m.Subsystems) == 0 {
 		return "", fmt.Errorf("no subsystem for mount")
 	}
@ -203,8 +218,8 @@ func GetAllSubsystems() ([]string, error) {
 	return subsystems, nil
 }

-// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
-func GetThisCgroupDir(subsystem string) (string, error) {
+// GetOwnCgroup returns the relative path to the cgroup docker is running in.
+func GetOwnCgroup(subsystem string) (string, error) {
 	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
 	if err != nil {
 		return "", err
@ -213,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
 	return getControllerPath(subsystem, cgroups)
 }

-func GetInitCgroupDir(subsystem string) (string, error) {
+func GetOwnCgroupPath(subsystem string) (string, error) {
+	cgroup, err := GetOwnCgroup(subsystem)
+	if err != nil {
+		return "", err
+	}

+	return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func GetInitCgroup(subsystem string) (string, error) {
 	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
 	if err != nil {
 		return "", err
@ -223,6 +246,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
 	return getControllerPath(subsystem, cgroups)
 }

+func GetInitCgroupPath(subsystem string) (string, error) {
+	cgroup, err := GetInitCgroup(subsystem)
+	if err != nil {
+		return "", err
+	}
+
+	return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
+	mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
+	if err != nil {
+		return "", err
+	}
+
+	// This is needed for nested containers, because in /proc/self/cgroup we
+	// see pathes from host, which don't exist in container.
+	relCgroup, err := filepath.Rel(root, cgroup)
+	if err != nil {
+		return "", err
+	}
+
+	return filepath.Join(mnt, relCgroup), nil
+}
+
 func readProcsFile(dir string) ([]int, error) {
 	f, err := os.Open(filepath.Join(dir, CgroupProcesses))
 	if err != nil {
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
@ -1,5 +1,3 @@
-// +build linux freebsd
-
 package configs

 type FreezerState string
@ -60,19 +58,19 @@ type Resources struct {
 	KernelMemoryTCP int64 `json:"kernel_memory_tcp"`

 	// CPU shares (relative weight vs. other containers)
-	CpuShares int64 `json:"cpu_shares"`
+	CpuShares uint64 `json:"cpu_shares"`

 	// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
 	CpuQuota int64 `json:"cpu_quota"`

 	// CPU period to be used for hardcapping (in usecs). 0 to use system default.
-	CpuPeriod int64 `json:"cpu_period"`
+	CpuPeriod uint64 `json:"cpu_period"`

 	// How many time CPU will use in realtime scheduling (in usecs).
 	CpuRtRuntime int64 `json:"cpu_rt_quota"`

 	// CPU period to be used for realtime scheduling (in usecs).
-	CpuRtPeriod int64 `json:"cpu_rt_period"`
+	CpuRtPeriod uint64 `json:"cpu_rt_period"`

 	// CPU to use
 	CpusetCpus string `json:"cpuset_cpus"`
@ -114,7 +112,7 @@ type Resources struct {
 	OomKillDisable bool `json:"oom_kill_disable"`

 	// Tuning swappiness behaviour per cgroup
-	MemorySwappiness *int64 `json:"memory_swappiness"`
+	MemorySwappiness *uint64 `json:"memory_swappiness"`

 	// Set priority of network traffic for container
 	NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@ -113,8 +113,8 @@ type Config struct {
 	Namespaces Namespaces `json:"namespaces"`

 	// Capabilities specify the capabilities to keep when executing the process inside the container
-	// All capbilities not specified will be dropped from the processes capability mask
-	Capabilities []string `json:"capabilities"`
+	// All capabilities not specified will be dropped from the processes capability mask
+	Capabilities *Capabilities `json:"capabilities"`

 	// Networks specifies the container's network setup to be created
 	Networks []*Network `json:"networks"`
@ -183,6 +183,9 @@ type Config struct {
 	// NoNewKeyring will not allocated a new session keyring for the container.  It will use the
 	// callers keyring in this case.
 	NoNewKeyring bool `json:"no_new_keyring"`
+
+	// Rootless specifies whether the container is a rootless container.
+	Rootless bool `json:"rootless"`
 }

 type Hooks struct {
@ -197,6 +200,19 @@ type Hooks struct {
 	Poststop []Hook
 }

+type Capabilities struct {
+	// Bounding is the set of capabilities checked by the kernel.
+	Bounding []string
+	// Effective is the set of capabilities checked by the kernel.
+	Effective []string
+	// Inheritable is the capabilities preserved across execve.
+	Inheritable []string
+	// Permitted is the limiting superset for effective capabilities.
+	Permitted []string
+	// Ambient is the ambient set of capabilities that are kept.
+	Ambient []string
+}
+
 func (hooks *Hooks) UnmarshalJSON(b []byte) error {
 	var state struct {
 		Prestart  []CommandHook
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
@ -0,0 +1,61 @@
+package configs
+
+import "fmt"
+
+// HostUID gets the translated uid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostUID(containerId int) (int, error) {
+	if c.Namespaces.Contains(NEWUSER) {
+		if c.UidMappings == nil {
+			return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
+		}
+		id, found := c.hostIDFromMapping(containerId, c.UidMappings)
+		if !found {
+			return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
+		}
+		return id, nil
+	}
+	// Return unchanged id.
+	return containerId, nil
+}
+
+// HostRootUID gets the root uid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootUID() (int, error) {
+	return c.HostUID(0)
+}
+
+// HostGID gets the translated gid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostGID(containerId int) (int, error) {
+	if c.Namespaces.Contains(NEWUSER) {
+		if c.GidMappings == nil {
+			return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
+		}
+		id, found := c.hostIDFromMapping(containerId, c.GidMappings)
+		if !found {
+			return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
+		}
+		return id, nil
+	}
+	// Return unchanged id.
+	return containerId, nil
+}
+
+// HostRootGID gets the root gid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootGID() (int, error) {
+	return c.HostGID(0)
+}
+
+// Utility function that gets a host ID for a container ID from user namespace map
+// if that ID is present in the map.
+func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
+	for _, m := range uMap {
+		if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
+			hostID := m.HostID + (containerID - m.ContainerID)
+			return hostID, true
+		}
+	}
+	return -1, false
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
@ -1,51 +0,0 @@
-// +build freebsd linux
-
-package configs
-
-import "fmt"
-
-// HostUID gets the root uid for the process on host which could be non-zero
-// when user namespaces are enabled.
-func (c Config) HostUID() (int, error) {
-	if c.Namespaces.Contains(NEWUSER) {
-		if c.UidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
-		}
-		id, found := c.hostIDFromMapping(0, c.UidMappings)
-		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
-		}
-		return id, nil
-	}
-	// Return default root uid 0
-	return 0, nil
-}
-
-// HostGID gets the root gid for the process on host which could be non-zero
-// when user namespaces are enabled.
-func (c Config) HostGID() (int, error) {
-	if c.Namespaces.Contains(NEWUSER) {
-		if c.GidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
-		}
-		id, found := c.hostIDFromMapping(0, c.GidMappings)
-		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
-		}
-		return id, nil
-	}
-	// Return default root gid 0
-	return 0, nil
-}
-
-// Utility function that gets a host ID for a container ID from user namespace map
-// if that ID is present in the map.
-func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
-	for _, m := range uMap {
-		if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
-			hostID := m.HostID + (containerID - m.ContainerID)
-			return hostID, true
-		}
-	}
-	return -1, false
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
@ -1,5 +1,3 @@
-// +build linux freebsd
-
 package configs

 import (
@ -64,12 +62,12 @@ func IsNamespaceSupported(ns NamespaceType) bool {

 func NamespaceTypes() []NamespaceType {
 	return []NamespaceType{
+		NEWUSER, // Keep user NS always first, don't move it.
+		NEWIPC,
+		NEWUTS,
 		NEWNET,
 		NEWPID,
 		NEWNS,
-		NEWUTS,
-		NEWIPC,
-		NEWUSER,
 	}
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
@ -2,19 +2,19 @@

 package configs

-import "syscall"
+import "golang.org/x/sys/unix"

 func (n *Namespace) Syscall() int {
 	return namespaceInfo[n.Type]
 }

 var namespaceInfo = map[NamespaceType]int{
-	NEWNET:  syscall.CLONE_NEWNET,
-	NEWNS:   syscall.CLONE_NEWNS,
-	NEWUSER: syscall.CLONE_NEWUSER,
-	NEWIPC:  syscall.CLONE_NEWIPC,
-	NEWUTS:  syscall.CLONE_NEWUTS,
-	NEWPID:  syscall.CLONE_NEWPID,
+	NEWNET:  unix.CLONE_NEWNET,
+	NEWNS:   unix.CLONE_NEWNS,
+	NEWUSER: unix.CLONE_NEWUSER,
+	NEWIPC:  unix.CLONE_NEWIPC,
+	NEWUTS:  unix.CLONE_NEWUTS,
+	NEWPID:  unix.CLONE_NEWPID,
 }

 // CloneFlags parses the container's Namespaces options to set the correct
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
@ -1,4 +1,4 @@
-// +build !linux,!freebsd
+// +build !linux

 package configs

--- a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_linux.go
@ -1,5 +1,3 @@
-// +build linux freebsd
-
 package devices

 import (
@ -8,9 +6,11 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	"syscall"
+	"syscall" //only for Stat_t

 	"github.com/opencontainers/runc/libcontainer/configs"
+
+	"golang.org/x/sys/unix"
 )

 var (
@ -38,10 +38,10 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
 	case mode&os.ModeDevice == 0:
 		return nil, ErrNotADevice
 	case mode&os.ModeCharDevice != 0:
-		fileModePermissionBits |= syscall.S_IFCHR
+		fileModePermissionBits |= unix.S_IFCHR
 		devType = 'c'
 	default:
-		fileModePermissionBits |= syscall.S_IFBLK
+		fileModePermissionBits |= unix.S_IFBLK
 		devType = 'b'
 	}
 	stat_t, ok := fileInfo.Sys().(*syscall.Stat_t)
@ -75,7 +75,8 @@ func getDevices(path string) ([]*configs.Device, error) {
 		switch {
 		case f.IsDir():
 			switch f.Name() {
-			case "pts", "shm", "fd", "mqueue":
+			// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
+			case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
 				continue
 			default:
 				sub, err := getDevices(filepath.Join(path, f.Name()))
--- a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_unsupported.go
@ -1,3 +1,3 @@
-// +build windows
+// +build !linux

 package devices
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
@ -33,7 +33,8 @@ enum sync_t {
 	SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
 	SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
 	SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
-	SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */
+	SYNC_GRANDCHILD  = 0x44, /* The grandchild is ready to run. */
+	SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */

 	/* XXX: This doesn't help with segfaults and other such issues. */
 	SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
@ -71,18 +72,23 @@ struct nlconfig_t {
 	char *namespaces;
 	size_t namespaces_len;
 	uint8_t is_setgroup;
+	uint8_t is_rootless;
+	char *oom_score_adj;
+	size_t oom_score_adj_len;
 };

 /*
 * List of netlink message types sent to us as part of bootstrapping the init.
 * These constants are defined in libcontainer/message_linux.go.
 */
-#define INIT_MSG		62000
+#define INIT_MSG			62000
 #define CLONE_FLAGS_ATTR	27281
 #define NS_PATHS_ATTR		27282
-#define UIDMAP_ATTR		27283
-#define GIDMAP_ATTR		27284
+#define UIDMAP_ATTR			27283
+#define GIDMAP_ATTR			27284
 #define SETGROUP_ATTR		27285
+#define OOM_SCORE_ADJ_ATTR	27286
+#define ROOTLESS_ATTR	    27287

 /*
 * Use the raw syscall for versions of glibc which don't include a function for
@ -171,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
 			policy = "deny";
 			break;
 		case SETGROUPS_DEFAULT:
+		default:
 			/* Nothing to do. */
 			return;
 	}
@ -185,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
 	}
 }

-static void update_uidmap(int pid, char *map, int map_len)
+static void update_uidmap(int pid, char *map, size_t map_len)
 {
 	if (map == NULL || map_len <= 0)
 		return;
@ -194,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len)
 		bail("failed to update /proc/%d/uid_map", pid);
 }

-static void update_gidmap(int pid, char *map, int map_len)
+static void update_gidmap(int pid, char *map, size_t map_len)
 {
 	if (map == NULL || map_len <= 0)
 		return;
@ -203,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len)
 		bail("failed to update /proc/%d/gid_map", pid);
 }

+static void update_oom_score_adj(char *data, size_t len)
+{
+	if (data == NULL || len <= 0)
+		return;
+
+	if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
+		bail("failed to update /proc/self/oom_score_adj");
+}
+
 /* A dummy function that just jumps to the given jumpval. */
 static int child_func(void *arg) __attribute__ ((noinline));
 static int child_func(void *arg)
@ -284,7 +300,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
 	/* Retrieve the netlink header. */
 	len = read(fd, &hdr, NLMSG_HDRLEN);
 	if (len != NLMSG_HDRLEN)
-		bail("invalid netlink header length %lu", len);
+		bail("invalid netlink header length %zu", len);

 	if (hdr.nlmsg_type == NLMSG_ERROR)
 		bail("failed to read netlink message");
@ -300,7 +316,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)

 	len = read(fd, data, size);
 	if (len != size)
-		bail("failed to read netlink payload, %lu != %lu", len, size);
+		bail("failed to read netlink payload, %zu != %zu", len, size);

 	/* Parse the netlink payload. */
 	config->data = data;
@ -316,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config)
 		case CLONE_FLAGS_ATTR:
 			config->cloneflags = readint32(current);
 			break;
+		case ROOTLESS_ATTR:
+			config->is_rootless = readint8(current);
+			break;
+		case OOM_SCORE_ADJ_ATTR:
+			config->oom_score_adj = current;
+			config->oom_score_adj_len = payload_len;
+			break;
 		case NS_PATHS_ATTR:
 			config->namespaces = current;
 			config->namespaces_len = payload_len;
@ -413,7 +436,7 @@ void nsexec(void)
 {
 	int pipenum;
 	jmp_buf env;
-	int syncpipe[2];
+	int sync_child_pipe[2], sync_grandchild_pipe[2];
 	struct nlconfig_t config = {0};

 	/*
@ -424,18 +447,43 @@ void nsexec(void)
 	if (pipenum == -1)
 		return;

-	/* make the process non-dumpable */
-	if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
-		bail("failed to set process as non-dumpable");
-	}
-
 	/* Parse all of the netlink configuration. */
 	nl_parse(pipenum, &config);

+	/* Set oom_score_adj. This has to be done before !dumpable because
+	 * /proc/self/oom_score_adj is not writeable unless you're an privileged
+	 * user (if !dumpable is set). All children inherit their parent's
+	 * oom_score_adj value on fork(2) so this will always be propagated
+	 * properly.
+	 */
+	update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
+
+	/*
+	 * Make the process non-dumpable, to avoid various race conditions that
+	 * could cause processes in namespaces we're joining to access host
+	 * resources (or potentially execute code).
+	 *
+	 * However, if the number of namespaces we are joining is 0, we are not
+	 * going to be switching to a different security context. Thus setting
+	 * ourselves to be non-dumpable only breaks things (like rootless
+	 * containers), which is the recommendation from the kernel folks.
+	 */
+	if (config.namespaces) {
+		if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
+			bail("failed to set process as non-dumpable");
+	}
+
 	/* Pipe so we can tell the child when we've finished setting up. */
-	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
+	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
 		bail("failed to setup sync pipe between parent and child");

+	/*
+	 * We need a new socketpair to sync with grandchild so we don't have
+	 * race condition with child.
+	 */
+	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
+		bail("failed to setup sync pipe between parent and grandchild");
+
 	/* TODO: Currently we aren't dealing with child deaths properly. */

 	/*
@ -494,9 +542,10 @@ void nsexec(void)
 	 *          process.
 	 */
 	case JUMP_PARENT: {
-			int len, ready = 0;
+			int len;
 			pid_t child;
 			char buf[JSON_MAX];
+			bool ready = false;

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
@ -513,30 +562,39 @@ void nsexec(void)
 			 * ready, so we can receive all possible error codes
 			 * generated by children.
 			 */
-			while (ready < 2) {
+			while (!ready) {
 				enum sync_t s;
+				int ret;

-				/* This doesn't need to be global, we're in the parent. */
-				int syncfd = syncpipe[1];
+				syncfd = sync_child_pipe[1];
+				close(sync_child_pipe[0]);

 				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with child: next state");

 				switch (s) {
-				case SYNC_ERR: {
-						/* We have to mirror the error code of the child. */
-						int ret;
+				case SYNC_ERR:
+					/* We have to mirror the error code of the child. */
+					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+						bail("failed to sync with child: read(error code)");

-						if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
-							bail("failed to sync with child: read(error code)");
-
-						exit(ret);
-					}
-					break;
+					exit(ret);
 				case SYNC_USERMAP_PLS:
-					/* Enable setgroups(2) if we've been asked to. */
+					/*
+					 * Enable setgroups(2) if we've been asked to. But we also
+					 * have to explicitly disable setgroups(2) if we're
+					 * creating a rootless container (this is required since
+					 * Linux 3.19).
+					 */
+					if (config.is_rootless && config.is_setgroup) {
+						kill(child, SIGKILL);
+						bail("cannot allow setgroup in an unprivileged user namespace setup");
+					}
+
 					if (config.is_setgroup)
 						update_setgroups(child, SETGROUPS_ALLOW);
+					if (config.is_rootless)
+						update_setgroups(child, SETGROUPS_DENY);

 					/* Set up mappings. */
 					update_uidmap(child, config.uidmap, config.uidmap_len);
@ -548,11 +606,6 @@ void nsexec(void)
 						bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
 					}
 					break;
-				case SYNC_USERMAP_ACK:
-					/* We should _never_ receive acks. */
-					kill(child, SIGKILL);
-					bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
-					break;
 				case SYNC_RECVPID_PLS: {
 						pid_t old = child;

@ -570,20 +623,46 @@ void nsexec(void)
 							bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
 						}
 					}
-
-					ready++;
-					break;
-				case SYNC_RECVPID_ACK:
-					/* We should _never_ receive acks. */
-					kill(child, SIGKILL);
-					bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
 					break;
 				case SYNC_CHILD_READY:
-					ready++;
+					ready = true;
 					break;
 				default:
-					bail("unexpected sync value");
+					bail("unexpected sync value: %u", s);
+				}
+			}
+
+			/* Now sync with grandchild. */
+
+			ready = false;
+			while (!ready) {
+				enum sync_t s;
+				int ret;
+
+				syncfd = sync_grandchild_pipe[1];
+				close(sync_grandchild_pipe[0]);
+
+				s = SYNC_GRANDCHILD;
+				if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+					kill(child, SIGKILL);
+					bail("failed to sync with child: write(SYNC_GRANDCHILD)");
+				}
+
+				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+					bail("failed to sync with child: next state");
+
+				switch (s) {
+				case SYNC_ERR:
+					/* We have to mirror the error code of the child. */
+					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+						bail("failed to sync with child: read(error code)");
+
+					exit(ret);
+				case SYNC_CHILD_READY:
+					ready = true;
 					break;
+				default:
+					bail("unexpected sync value: %u", s);
 				}
 			}

@ -615,7 +694,8 @@ void nsexec(void)
 			enum sync_t s;

 			/* We're in a child and thus need to tell the parent if we die. */
-			syncfd = syncpipe[0];
+			syncfd = sync_child_pipe[0];
+			close(sync_child_pipe[1]);

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
@ -653,6 +733,11 @@ void nsexec(void)
 				 * clone_parent rant). So signal our parent to hook us up.
 				 */

+				/* Switching is only necessary if we joined namespaces. */
+				if (config.namespaces) {
+					if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
+						bail("failed to set process as dumpable");
+				}
 				s = SYNC_USERMAP_PLS;
 				if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
@ -663,6 +748,11 @@ void nsexec(void)
 					bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
 				if (s != SYNC_USERMAP_ACK)
 					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
+				/* Switching is only necessary if we joined namespaces. */
+				if (config.namespaces) {
+					if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
+						bail("failed to set process as dumpable");
+				}
 			}

 			/*
@ -700,6 +790,12 @@ void nsexec(void)
 				bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
 			}

+			s = SYNC_CHILD_READY;
+			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+				kill(child, SIGKILL);
+				bail("failed to sync with parent: write(SYNC_CHILD_READY)");
+			}
+
 			/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
 			exit(0);
 		}
@ -718,11 +814,19 @@ void nsexec(void)
 			enum sync_t s;

 			/* We're in a child and thus need to tell the parent if we die. */
-			syncfd = syncpipe[0];
+			syncfd = sync_grandchild_pipe[0];
+			close(sync_grandchild_pipe[1]);
+			close(sync_child_pipe[0]);
+			close(sync_child_pipe[1]);

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);

+			if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+				bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
+			if (s != SYNC_GRANDCHILD)
+				bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
+
 			if (setsid() < 0)
 				bail("setsid failed");

@ -732,16 +836,17 @@ void nsexec(void)
 			if (setgid(0) < 0)
 				bail("setgid failed");

-			if (setgroups(0, NULL) < 0)
-				bail("setgroups failed");
+			if (!config.is_rootless && config.is_setgroup) {
+				if (setgroups(0, NULL) < 0)
+					bail("setgroups failed");
+			}

 			s = SYNC_CHILD_READY;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 				bail("failed to sync with patent: write(SYNC_CHILD_READY)");

 			/* Close sync pipes. */
-			close(syncpipe[0]);
-			close(syncpipe[1]);
+			close(sync_grandchild_pipe[0]);

 			/* Free netlink data. */
 			nl_free(&config);
@ -751,7 +856,6 @@ void nsexec(void)
 		}
 	default:
 		bail("unexpected jump value");
-		break;
 	}

 	/* Should never be reached. */
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
@ -7,8 +7,10 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
-	"syscall"
+	"syscall" // only for exec
 	"unsafe"
+
+	"golang.org/x/sys/unix"
 )

 // If arg2 is nonzero, set the "child subreaper" attribute of the
@ -53,8 +55,8 @@ func Execv(cmd string, args []string, env []string) error {
 	return syscall.Exec(name, args, env)
 }

-func Prlimit(pid, resource int, limit syscall.Rlimit) error {
-	_, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
+func Prlimit(pid, resource int, limit unix.Rlimit) error {
+	_, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
 	if err != 0 {
 		return err
 	}
@ -62,7 +64,7 @@ func Prlimit(pid, resource int, limit syscall.Rlimit) error {
 }

 func SetParentDeathSignal(sig uintptr) error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
 		return err
 	}
 	return nil
@ -70,15 +72,14 @@ func SetParentDeathSignal(sig uintptr) error {

 func GetParentDeathSignal() (ParentDeathSignal, error) {
 	var sig int
-	_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
-	if err != 0 {
+	if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
 		return -1, err
 	}
 	return ParentDeathSignal(sig), nil
 }

 func SetKeepCaps() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
 		return err
 	}

@ -86,7 +87,7 @@ func SetKeepCaps() error {
 }

 func ClearKeepCaps() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
 		return err
 	}

@ -94,7 +95,7 @@ func ClearKeepCaps() error {
 }

 func Setctty() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
+	if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
 		return err
 	}
 	return nil
@ -131,13 +132,5 @@ func RunningInUserNS() bool {

 // SetSubreaper sets the value i as the subreaper setting for the calling process
 func SetSubreaper(i int) error {
-	return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
-}
-
-func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
-	_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
-	if e1 != 0 {
-		err = e1
-	}
-	return
+	return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
@ -1,43 +1,113 @@
 package system

 import (
+	"fmt"
 	"io/ioutil"
 	"path/filepath"
 	"strconv"
 	"strings"
 )

-// look in /proc to find the process start time so that we can verify
-// that this pid has started after ourself
+// State is the status of a process.
+type State rune
+
+const ( // Only values for Linux 3.14 and later are listed here
+	Dead        State = 'X'
+	DiskSleep   State = 'D'
+	Running     State = 'R'
+	Sleeping    State = 'S'
+	Stopped     State = 'T'
+	TracingStop State = 't'
+	Zombie      State = 'Z'
+)
+
+// String forms of the state from proc(5)'s documentation for
+// /proc/[pid]/status' "State" field.
+func (s State) String() string {
+	switch s {
+	case Dead:
+		return "dead"
+	case DiskSleep:
+		return "disk sleep"
+	case Running:
+		return "running"
+	case Sleeping:
+		return "sleeping"
+	case Stopped:
+		return "stopped"
+	case TracingStop:
+		return "tracing stop"
+	case Zombie:
+		return "zombie"
+	default:
+		return fmt.Sprintf("unknown (%c)", s)
+	}
+}
+
+// Stat_t represents the information from /proc/[pid]/stat, as
+// described in proc(5) with names based on the /proc/[pid]/status
+// fields.
+type Stat_t struct {
+	// PID is the process ID.
+	PID uint
+
+	// Name is the command run by the process.
+	Name string
+
+	// State is the state of the process.
+	State State
+
+	// StartTime is the number of clock ticks after system boot (since
+	// Linux 2.6).
+	StartTime uint64
+}
+
+// Stat returns a Stat_t instance for the specified process.
+func Stat(pid int) (stat Stat_t, err error) {
+	bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
+	if err != nil {
+		return stat, err
+	}
+	return parseStat(string(bytes))
+}
+
+// GetProcessStartTime is deprecated.  Use Stat(pid) and
+// Stat_t.StartTime instead.
 func GetProcessStartTime(pid int) (string, error) {
-	data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
+	stat, err := Stat(pid)
 	if err != nil {
 		return "", err
 	}
-	return parseStartTime(string(data))
+	return fmt.Sprintf("%d", stat.StartTime), nil
 }

-func parseStartTime(stat string) (string, error) {
-	// the starttime is located at pos 22
-	// from the man page
-	//
-	// starttime %llu (was %lu before Linux 2.6)
-	// (22)  The  time the process started after system boot.  In kernels before Linux 2.6, this
-	// value was expressed in jiffies.  Since Linux 2.6, the value is expressed in  clock  ticks
-	// (divide by sysconf(_SC_CLK_TCK)).
-	//
-	// NOTE:
-	// pos 2 could contain space and is inside `(` and `)`:
-	// (2) comm  %s
-	// The filename of the executable, in parentheses.
-	// This is visible whether or not the executable is
-	// swapped out.
-	//
-	// the following is an example:
+func parseStat(data string) (stat Stat_t, err error) {
+	// From proc(5), field 2 could contain space and is inside `(` and `)`.
+	// The following is an example:
 	// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
+	i := strings.LastIndex(data, ")")
+	if i <= 2 || i >= len(data)-1 {
+		return stat, fmt.Errorf("invalid stat data: %q", data)
+	}

-	// get parts after last `)`:
-	s := strings.Split(stat, ")")
-	parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
-	return parts[22-3], nil // starts at 3 (after the filename pos `2`)
+	parts := strings.SplitN(data[:i], "(", 2)
+	if len(parts) != 2 {
+		return stat, fmt.Errorf("invalid stat data: %q", data)
+	}
+
+	stat.Name = parts[1]
+	_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
+	if err != nil {
+		return stat, err
+	}
+
+	// parts indexes should be offset by 3 from the field number given
+	// proc(5), because parts is zero-indexed and we've removed fields
+	// one (PID) and two (Name) in the paren-split.
+	parts = strings.Split(data[i+2:], " ")
+	var state int
+	fmt.Sscanf(parts[3-3], "%c", &state)
+	stat.State = State(state)
+	fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
+	return stat, nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
@ -1,40 +0,0 @@
-package system
-
-import (
-	"fmt"
-	"runtime"
-	"syscall"
-)
-
-// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
-//
-// We need different setns values for the different platforms and arch
-// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
-var setNsMap = map[string]uintptr{
-	"linux/386":     346,
-	"linux/arm64":   268,
-	"linux/amd64":   308,
-	"linux/arm":     375,
-	"linux/ppc":     350,
-	"linux/ppc64":   350,
-	"linux/ppc64le": 350,
-	"linux/s390x":   339,
-}
-
-var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
-
-func SysSetns() uint32 {
-	return uint32(sysSetns)
-}
-
-func Setns(fd uintptr, flags uintptr) error {
-	ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
-	if !exists {
-		return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
-	}
-	_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
-	if err != 0 {
-		return err
-	}
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
@ -3,12 +3,12 @@
 package system

 import (
-	"syscall"
+	"golang.org/x/sys/unix"
 )

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {

 // Setgid sets the gid of the calling thread to the specified gid.
 func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
@ -3,12 +3,12 @@
 package system

 import (
-	"syscall"
+	"golang.org/x/sys/unix"
 )

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {

 // Setgid sets the gid of the calling thread to the specified gid.
 func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
@ -3,12 +3,12 @@
 package system

 import (
-	"syscall"
+	"golang.org/x/sys/unix"
 )

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {

 // Setgid sets the gid of the calling thread to the specified gid.
 func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go
@ -1,99 +1,35 @@
 package system

-import (
-	"syscall"
-	"unsafe"
-)
-
-var _zero uintptr
-
-// Returns the size of xattrs and nil error
-// Requires path, takes allocated []byte or nil as last argument
-func Llistxattr(path string, dest []byte) (size int, err error) {
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return -1, err
-	}
-	var newpathBytes unsafe.Pointer
-	if len(dest) > 0 {
-		newpathBytes = unsafe.Pointer(&dest[0])
-	} else {
-		newpathBytes = unsafe.Pointer(&_zero)
-	}
-
-	_size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0)
-	size = int(_size)
-	if errno != 0 {
-		return -1, errno
-	}
-
-	return size, nil
-}
+import "golang.org/x/sys/unix"

 // Returns a []byte slice if the xattr is set and nil otherwise
 // Requires path and its attribute as arguments
 func Lgetxattr(path string, attr string) ([]byte, error) {
 	var sz int
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return nil, err
-	}
-	attrBytes, err := syscall.BytePtrFromString(attr)
-	if err != nil {
-		return nil, err
-	}
-
 	// Start with a 128 length byte array
-	sz = 128
-	dest := make([]byte, sz)
-	destBytes := unsafe.Pointer(&dest[0])
-	_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
+	dest := make([]byte, 128)
+	sz, errno := unix.Lgetxattr(path, attr, dest)

 	switch {
-	case errno == syscall.ENODATA:
+	case errno == unix.ENODATA:
 		return nil, errno
-	case errno == syscall.ENOTSUP:
+	case errno == unix.ENOTSUP:
 		return nil, errno
-	case errno == syscall.ERANGE:
+	case errno == unix.ERANGE:
 		// 128 byte array might just not be good enough,
-		// A dummy buffer is used ``uintptr(0)`` to get real size
+		// A dummy buffer is used to get the real size
 		// of the xattrs on disk
-		_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
-		sz = int(_sz)
-		if sz < 0 {
+		sz, errno = unix.Lgetxattr(path, attr, []byte{})
+		if errno != nil {
 			return nil, errno
 		}
 		dest = make([]byte, sz)
-		destBytes := unsafe.Pointer(&dest[0])
-		_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
-		if errno != 0 {
+		sz, errno = unix.Lgetxattr(path, attr, dest)
+		if errno != nil {
 			return nil, errno
 		}
-	case errno != 0:
+	case errno != nil:
 		return nil, errno
 	}
-	sz = int(_sz)
 	return dest[:sz], nil
 }
-
-func Lsetxattr(path string, attr string, data []byte, flags int) error {
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return err
-	}
-	attrBytes, err := syscall.BytePtrFromString(attr)
-	if err != nil {
-		return err
-	}
-	var dataBytes unsafe.Pointer
-	if len(data) > 0 {
-		dataBytes = unsafe.Pointer(&data[0])
-	} else {
-		dataBytes = unsafe.Pointer(&_zero)
-	}
-	_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
-	if errno != 0 {
-		return errno
-	}
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
@ -2,7 +2,8 @@ package user

 import (
 	"errors"
-	"syscall"
+
+	"golang.org/x/sys/unix"
 )

 var (
@ -40,7 +41,7 @@ func lookupUser(filter func(u User) bool) (User, error) {
 // user cannot be found (or there is no /etc/passwd file on the filesystem),
 // then CurrentUser returns an error.
 func CurrentUser() (User, error) {
-	return LookupUid(syscall.Getuid())
+	return LookupUid(unix.Getuid())
 }

 // LookupUser looks up a user by their username in /etc/passwd. If the user
@ -88,7 +89,7 @@ func lookupGroup(filter func(g Group) bool) (Group, error) {
 // entry in /etc/passwd. If the group cannot be found (or there is no
 // /etc/group file on the filesystem), then CurrentGroup returns an error.
 func CurrentGroup() (Group, error) {
-	return LookupGid(syscall.Getgid())
+	return LookupGid(unix.Getgid())
 }

 // LookupGroup looks up a group by its name in /etc/group. If the group cannot
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
@ -199,18 +199,16 @@ type ExecUser struct {
 // files cannot be opened for any reason, the error is ignored and a nil
 // io.Reader is passed instead.
 func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
-	passwd, err := os.Open(passwdPath)
-	if err != nil {
-		passwd = nil
-	} else {
-		defer passwd.Close()
+	var passwd, group io.Reader
+
+	if passwdFile, err := os.Open(passwdPath); err == nil {
+		passwd = passwdFile
+		defer passwdFile.Close()
 	}

-	group, err := os.Open(groupPath)
-	if err != nil {
-		group = nil
-	} else {
-		defer group.Close()
+	if groupFile, err := os.Open(groupPath); err == nil {
+		group = groupFile
+		defer groupFile.Close()
 	}

 	return GetExecUser(userSpec, defaults, passwd, group)
@ -360,8 +358,8 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (

 				// Okay, so it's numeric. We can just roll with this.
 			}
-		} else if len(groups) > 0 {
-			// Supplementary group ids only make sense if in the implicit form.
+		} else if len(groups) > 0 && uidErr != nil {
+			// Supplementary group ids only make sense if in the implicit form for non-numeric users.
 			user.Sgids = make([]int, len(groups))
 			for i, group := range groups {
 				user.Sgids[i] = group.Gid
@ -433,9 +431,11 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
 // that opens the groupPath given and gives it as an argument to
 // GetAdditionalGroups.
 func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
-	group, err := os.Open(groupPath)
-	if err == nil {
-		defer group.Close()
+	var group io.Reader
+
+	if groupFile, err := os.Open(groupPath); err == nil {
+		group = groupFile
+		defer groupFile.Close()
 	}
 	return GetAdditionalGroups(additionalGroups, group)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
@ -1,148 +0,0 @@
-/*
- * Copyright 2016 SUSE LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "cmsg.h"
-
-#define error(fmt, ...)							\
-	({								\
-		fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
-		errno = ECOMM;						\
-		goto err; /* return value */				\
-	})
-
-/*
- * Sends a file descriptor along the sockfd provided. Returns the return
- * value of sendmsg(2). Any synchronisation and preparation of state
- * should be done external to this (we expect the other side to be in
- * recvfd() in the code).
- */
-ssize_t sendfd(int sockfd, struct file_t file)
-{
-	struct msghdr msg = {0};
-	struct iovec iov[1] = {0};
-	struct cmsghdr *cmsg;
-	int *fdptr;
-	int ret;
-
-	union {
-		char buf[CMSG_SPACE(sizeof(file.fd))];
-		struct cmsghdr align;
-	} u;
-
-	/*
-	 * We need to send some other data along with the ancillary data,
-	 * otherwise the other side won't recieve any data. This is very
-	 * well-hidden in the documentation (and only applies to
-	 * SOCK_STREAM). See the bottom part of unix(7).
-	 */
-	iov[0].iov_base = file.name;
-	iov[0].iov_len = strlen(file.name) + 1;
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_iov = iov;
-	msg.msg_iovlen = 1;
-	msg.msg_control = u.buf;
-	msg.msg_controllen = sizeof(u.buf);
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SCM_RIGHTS;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(int));
-
-	fdptr = (int *) CMSG_DATA(cmsg);
-	memcpy(fdptr, &file.fd, sizeof(int));
-
-	return sendmsg(sockfd, &msg, 0);
-}
-
-/*
- * Receives a file descriptor from the sockfd provided. Returns the file
- * descriptor as sent from sendfd(). It will return the file descriptor
- * or die (literally) trying. Any synchronisation and preparation of
- * state should be done external to this (we expect the other side to be
- * in sendfd() in the code).
- */
-struct file_t recvfd(int sockfd)
-{
-	struct msghdr msg = {0};
-	struct iovec iov[1] = {0};
-	struct cmsghdr *cmsg;
-	struct file_t file = {0};
-	int *fdptr;
-	int olderrno;
-
-	union {
-		char buf[CMSG_SPACE(sizeof(file.fd))];
-		struct cmsghdr align;
-	} u;
-
-	/* Allocate a buffer. */
-	/* TODO: Make this dynamic with MSG_PEEK. */
-	file.name = malloc(TAG_BUFFER);
-	if (!file.name)
-		error("recvfd: failed to allocate file.tag buffer\n");
-
-	/*
-	 * We need to "recieve" the non-ancillary data even though we don't
-	 * plan to use it at all. Otherwise, things won't work as expected.
-	 * See unix(7) and other well-hidden documentation.
-	 */
-	iov[0].iov_base = file.name;
-	iov[0].iov_len = TAG_BUFFER;
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_iov = iov;
-	msg.msg_iovlen = 1;
-	msg.msg_control = u.buf;
-	msg.msg_controllen = sizeof(u.buf);
-
-	ssize_t ret = recvmsg(sockfd, &msg, 0);
-	if (ret < 0)
-		goto err;
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	if (!cmsg)
-		error("recvfd: got NULL from CMSG_FIRSTHDR");
-	if (cmsg->cmsg_level != SOL_SOCKET)
-		error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
-	if (cmsg->cmsg_type != SCM_RIGHTS)
-		error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
-	if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
-		error("recvfd: expected correct CMSG_LEN in cmsg: %lu", (unsigned long)cmsg->cmsg_len);
-
-	fdptr = (int *) CMSG_DATA(cmsg);
-	if (!fdptr || *fdptr < 0)
-		error("recvfd: recieved invalid pointer");
-
-	file.fd = *fdptr;
-	return file;
-
-err:
-	olderrno = errno;
-	free(file.name);
-	errno = olderrno;
-	return (struct file_t){0};
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
@ -3,7 +3,7 @@
 package utils

 /*
- * Copyright 2016 SUSE LLC
+ * Copyright 2016, 2017 SUSE LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -18,28 +18,66 @@ package utils
 * limitations under the License.
 */

-/*
-#include <errno.h>
-#include <stdlib.h>
-#include "cmsg.h"
-*/
-import "C"
-
 import (
+	"fmt"
 	"os"
-	"unsafe"
+
+	"golang.org/x/sys/unix"
 )

+// MaxSendfdLen is the maximum length of the name of a file descriptor being
+// sent using SendFd. The name of the file handle returned by RecvFd will never
+// be larger than this value.
+const MaxNameLen = 4096
+
+// oobSpace is the size of the oob slice required to store a single FD. Note
+// that unix.UnixRights appears to make the assumption that fd is always int32,
+// so sizeof(fd) = 4.
+var oobSpace = unix.CmsgSpace(4)
+
 // RecvFd waits for a file descriptor to be sent over the given AF_UNIX
 // socket. The file name of the remote file descriptor will be recreated
 // locally (it is sent as non-auxiliary data in the same payload).
 func RecvFd(socket *os.File) (*os.File, error) {
-	file, err := C.recvfd(C.int(socket.Fd()))
+	// For some reason, unix.Recvmsg uses the length rather than the capacity
+	// when passing the msg_controllen and other attributes to recvmsg.  So we
+	// have to actually set the length.
+	name := make([]byte, MaxNameLen)
+	oob := make([]byte, oobSpace)
+
+	sockfd := socket.Fd()
+	n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
 	if err != nil {
 		return nil, err
 	}
-	defer C.free(unsafe.Pointer(file.name))
-	return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil
+
+	if n >= MaxNameLen || oobn != oobSpace {
+		return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
+	}
+
+	// Truncate.
+	name = name[:n]
+	oob = oob[:oobn]
+
+	scms, err := unix.ParseSocketControlMessage(oob)
+	if err != nil {
+		return nil, err
+	}
+	if len(scms) != 1 {
+		return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
+	}
+	scm := scms[0]
+
+	fds, err := unix.ParseUnixRights(&scm)
+	if err != nil {
+		return nil, err
+	}
+	if len(fds) != 1 {
+		return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
+	}
+	fd := uintptr(fds[0])
+
+	return os.NewFile(fd, string(name)), nil
 }

 // SendFd sends a file descriptor over the given AF_UNIX socket. In
@ -47,11 +85,11 @@ func RecvFd(socket *os.File) (*os.File, error) {
 // non-auxiliary data in the same payload (allowing to send contextual
 // information for a file descriptor).
 func SendFd(socket, file *os.File) error {
-	var cfile C.struct_file_t
-	cfile.fd = C.int(file.Fd())
-	cfile.name = C.CString(file.Name())
-	defer C.free(unsafe.Pointer(cfile.name))
+	name := []byte(file.Name())
+	if len(name) >= MaxNameLen {
+		return fmt.Errorf("sendfd: filename too long: %s", file.Name())
+	}
+	oob := unix.UnixRights(int(file.Fd()))

-	_, err := C.sendfd(C.int(socket.Fd()), cfile)
-	return err
+	return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
@ -1,36 +0,0 @@
-/*
- * Copyright 2016 SUSE LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#if !defined(CMSG_H)
-#define CMSG_H
-
-#include <sys/types.h>
-
-/* TODO: Implement this properly with MSG_PEEK. */
-#define TAG_BUFFER 4096
-
-/* This mirrors Go's (*os.File). */
-struct file_t {
-	char *name;
-	int fd;
-};
-
-struct file_t recvfd(int sockfd);
-ssize_t sendfd(int sockfd, struct file_t file);
-
-#endif /* !defined(CMSG_H) */
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
@ -8,8 +8,9 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
-	"syscall"
 	"unsafe"
+
+	"golang.org/x/sys/unix"
 )

 const (
@ -41,7 +42,7 @@ func ResolveRootfs(uncleanRootfs string) (string, error) {

 // ExitStatus returns the correct exit status for a process based on if it
 // was signaled or exited cleanly
-func ExitStatus(status syscall.WaitStatus) int {
+func ExitStatus(status unix.WaitStatus) int {
 	if status.Signaled() {
 		return exitSignalOffset + int(status.Signal())
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
@ -4,8 +4,10 @@ package utils

 import (
 	"io/ioutil"
+	"os"
 	"strconv"
-	"syscall"
+
+	"golang.org/x/sys/unix"
 )

 func CloseExecFrom(minFd int) error {
@ -25,9 +27,18 @@ func CloseExecFrom(minFd int) error {
 			continue
 		}

-		// intentionally ignore errors from syscall.CloseOnExec
-		syscall.CloseOnExec(fd)
+		// intentionally ignore errors from unix.CloseOnExec
+		unix.CloseOnExec(fd)
 		// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
 	}
 	return nil
 }
+
+// NewSockPair returns a new unix socket pair
+func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
+	fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
+	if err != nil {
+		return nil, nil, err
+	}
+	return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
+}