Bump up runtime-spec dependency to v1.0.0

Signed-off-by: Mrunal Patel <mpatel@redhat.com>
This commit is contained in:
Mrunal Patel 2017-07-19 21:07:01 -07:00
parent 0eb5cd527f
commit 4128bbd7dc
83 changed files with 1020 additions and 14970 deletions

View file

@ -77,6 +77,12 @@ You can run a specific test case by setting the `TESTFLAGS` variable.
# make test TESTFLAGS="-run=SomeTestFunction"
```
### Dependencies Management
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
new dependencies.
## Using runc
### Creating an OCI Bundle
@ -111,8 +117,8 @@ Assuming you have an OCI bundle from the previous step you can execute the conta
The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits.
```bash
# run as root
cd /mycontainer
runc run mycontainerid
```
@ -159,8 +165,8 @@ Now we can go though the lifecycle operations in your shell.
```bash
# run as root
cd /mycontainer
runc create mycontainerid
# view the container is created and in the "created" state
@ -179,6 +185,22 @@ runc delete mycontainerid
This adds more complexity but allows higher level systems to manage runc and provides points in the containers creation to setup various settings after the container has created and/or before it is deleted.
This is commonly used to setup the container's network stack after `create` but before `start` where the user's defined process will be running.
#### Rootless containers
`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user:
```bash
# Same as the first example
mkdir ~/mycontainer
cd ~/mycontainer
mkdir rootfs
docker export $(docker create busybox) | tar -C rootfs -xvf -
# The --rootless parameter instructs runc spec to generate a configuration for a rootless container, which will allow you to run the container as a non-root user.
runc spec --rootless
# The --root parameter tells runc where to store the container state. It must be writable by the user.
runc --root /tmp/runc run mycontainerid
```
#### Supervisors
`runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit.

View file

@ -56,25 +56,91 @@ Once you have an instance of the factory created we can create a configuration
struct describing how the container is to be created. A sample would look similar to this:
```go
defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
config := &configs.Config{
Rootfs: "/your/path/to/rootfs",
Capabilities: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Capabilities: &configs.Capabilities{
Bounding: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Effective: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Inheritable: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Permitted: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Ambient: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
@ -112,14 +178,14 @@ config := &configs.Config{
Source: "tmpfs",
Destination: "/dev",
Device: "tmpfs",
Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME,
Flags: unix.MS_NOSUID | unix.MS_STRICTATIME,
Data: "mode=755",
},
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
Flags: unix.MS_NOSUID | unix.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
{
@ -139,7 +205,7 @@ config := &configs.Config{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | syscall.MS_RDONLY,
Flags: defaultMountFlags | unix.MS_RDONLY,
},
},
UidMappings: []configs.IDMap{
@ -165,7 +231,7 @@ config := &configs.Config{
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Type: unix.RLIMIT_NOFILE,
Hard: uint64(1025),
Soft: uint64(1025),
},

View file

@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil
}
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}
func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
parentPath, err := raw.parentPath(subsystem, mnt, root)
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
@ -346,8 +332,8 @@ func removePath(p string, err error) error {
return nil
}
func CheckCpushares(path string, c int64) error {
var cpuShares int64
func CheckCpushares(path string, c uint64) error {
var cpuShares uint64
if c == 0 {
return nil

View file

@ -55,7 +55,7 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
@ -69,12 +69,12 @@ func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuShares != 0 {
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuPeriod != 0 {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
return err
}
}

View file

@ -57,10 +57,11 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
if dir == "" {
return nil
}
root, err := getCgroupRoot()
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
if err != nil {
return err
}
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
// 'ensureParent' start with parent because we don't want to
// explicitly inherit from parent, it could conflict with
// 'cpuset.cpu_exclusive'.

View file

@ -10,13 +10,19 @@ import (
"path/filepath"
"strconv"
"strings"
"syscall"
"syscall" // only for Errno
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
)
type MemoryGroup struct {
}
@ -29,14 +35,18 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
path, err := d.path("memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
} else if path == "" {
return nil
}
if memoryAssigned(d.config) {
if path != "" {
if _, err := os.Stat(path); os.IsNotExist(err) {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
}
if d.config.KernelMemory != 0 {
// Only enable kernel memory accouting when this cgroup
// is created by libcontainer, otherwise we might get
// error when people use `cgroupsPath` to join an existed
// cgroup whose kernel memory is not initialized.
if err := EnableKernelMemoryAccounting(path); err != nil {
return err
}
@ -85,7 +95,7 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == syscall.EBUSY {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
@ -96,9 +106,18 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
}
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// If the memory update is set to -1 we should also
// set swap to -1, it means unlimited memory.
if cgroup.Resources.Memory == -1 {
// Only set swap if it's enabled in kernel
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
cgroup.Resources.MemorySwap = -1
}
}
// When memory and swap memory are both set, we need to handle the cases
// for updating container.
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 {
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return err
@ -107,29 +126,29 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// When update memory limit, we should adapt the write sequence
// for memory and swap memory, so it won't fail because the new
// value and the old value don't fit kernel's validation.
if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
} else {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
} else {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap != 0 {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
@ -167,12 +186,12 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
}
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
return nil
} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
} else if *cgroup.Resources.MemorySwappiness <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
return err
}
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
}
return nil
@ -224,6 +243,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
}
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
value, err := getCgroupParamUint(path, useHierarchy)
if err != nil {
return err
}
if value == 1 {
stats.MemoryStats.UseHierarchy = true
}
return nil
}
@ -234,7 +261,7 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
cgroup.Resources.KernelMemory > 0 ||
cgroup.Resources.KernelMemoryTCP > 0 ||
cgroup.Resources.OomKillDisable ||
(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {

View file

@ -51,6 +51,8 @@ type MemoryStats struct {
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
// usage of kernel TCP memory
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`
Stats map[string]uint64 `json:"stats,omitempty"`
}

View file

@ -5,7 +5,6 @@ package systemd
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
@ -261,12 +260,19 @@ func (m *Manager) Apply(pid int) error {
if c.Resources.Memory != 0 {
properties = append(properties,
newProp("MemoryLimit", uint64(c.Resources.Memory)))
newProp("MemoryLimit", c.Resources.Memory))
}
if c.Resources.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", uint64(c.Resources.CpuShares)))
newProp("CPUShares", c.Resources.CpuShares))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.BlkioWeight != 0 {
@ -327,15 +333,6 @@ func (m *Manager) GetPaths() map[string]string {
return paths
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
path, err := getSubsystemPath(c, subsystem)
if err != nil {
@ -429,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err
}
initPath, err := cgroups.GetInitCgroupDir(subsystem)
initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil {
return "", err
}

View file

@ -66,6 +66,21 @@ func isSubsystemAvailable(subsystem string) bool {
return avail
}
func GetClosestMountpointAncestor(dir, mountinfo string) string {
deepestMountPoint := ""
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
mountInfoParts := strings.Fields(mountInfoEntry)
if len(mountInfoParts) < 5 {
continue
}
mountPoint := mountInfoParts[4]
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
deepestMountPoint = mountPoint
}
}
return deepestMountPoint
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@ -109,7 +124,7 @@ type Mount struct {
Subsystems []string
}
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
@ -203,8 +218,8 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
@ -213,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupDir(subsystem string) (string, error) {
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
@ -223,6 +246,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil {

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package configs
type FreezerState string
@ -60,19 +58,19 @@ type Resources struct {
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
CpuShares uint64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
CpuPeriod uint64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_rt_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_rt_period"`
CpuRtPeriod uint64 `json:"cpu_rt_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
@ -114,7 +112,7 @@ type Resources struct {
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness *int64 `json:"memory_swappiness"`
MemorySwappiness *uint64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`

View file

@ -113,8 +113,8 @@ type Config struct {
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities"`
// All capabilities not specified will be dropped from the processes capability mask
Capabilities *Capabilities `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
@ -183,6 +183,9 @@ type Config struct {
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
}
type Hooks struct {
@ -197,6 +200,19 @@ type Hooks struct {
Poststop []Hook
}
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
Bounding []string
// Effective is the set of capabilities checked by the kernel.
Effective []string
// Inheritable is the capabilities preserved across execve.
Inheritable []string
// Permitted is the limiting superset for effective capabilities.
Permitted []string
// Ambient is the ambient set of capabilities that are kept.
Ambient []string
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state struct {
Prestart []CommandHook

View file

@ -0,0 +1,61 @@
package configs
import "fmt"
// HostUID gets the translated uid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootUID() (int, error) {
return c.HostUID(0)
}
// HostGID gets the translated gid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootGID() (int, error) {
return c.HostGID(0)
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View file

@ -1,51 +0,0 @@
// +build freebsd linux
package configs
import "fmt"
// HostUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// HostGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
}
return id, nil
}
// Return default root gid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package configs
import (
@ -64,12 +62,12 @@ func IsNamespaceSupported(ns NamespaceType) bool {
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWUSER, // Keep user NS always first, don't move it.
NEWIPC,
NEWUTS,
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}

View file

@ -2,19 +2,19 @@
package configs
import "syscall"
import "golang.org/x/sys/unix"
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: syscall.CLONE_NEWNET,
NEWNS: syscall.CLONE_NEWNS,
NEWUSER: syscall.CLONE_NEWUSER,
NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID,
NEWNET: unix.CLONE_NEWNET,
NEWNS: unix.CLONE_NEWNS,
NEWUSER: unix.CLONE_NEWUSER,
NEWIPC: unix.CLONE_NEWIPC,
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
}
// CloneFlags parses the container's Namespaces options to set the correct

View file

@ -1,4 +1,4 @@
// +build !linux,!freebsd
// +build !linux
package configs

View file

@ -1,5 +1,3 @@
// +build linux freebsd
package devices
import (
@ -8,9 +6,11 @@ import (
"io/ioutil"
"os"
"path/filepath"
"syscall"
"syscall" //only for Stat_t
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
var (
@ -38,10 +38,10 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
case mode&os.ModeDevice == 0:
return nil, ErrNotADevice
case mode&os.ModeCharDevice != 0:
fileModePermissionBits |= syscall.S_IFCHR
fileModePermissionBits |= unix.S_IFCHR
devType = 'c'
default:
fileModePermissionBits |= syscall.S_IFBLK
fileModePermissionBits |= unix.S_IFBLK
devType = 'b'
}
stat_t, ok := fileInfo.Sys().(*syscall.Stat_t)
@ -75,7 +75,8 @@ func getDevices(path string) ([]*configs.Device, error) {
switch {
case f.IsDir():
switch f.Name() {
case "pts", "shm", "fd", "mqueue":
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
continue
default:
sub, err := getDevices(filepath.Join(path, f.Name()))

View file

@ -1,3 +1,3 @@
// +build windows
// +build !linux
package devices

View file

@ -33,7 +33,8 @@ enum sync_t {
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */
SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */
/* XXX: This doesn't help with segfaults and other such issues. */
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
@ -71,18 +72,23 @@ struct nlconfig_t {
char *namespaces;
size_t namespaces_len;
uint8_t is_setgroup;
uint8_t is_rootless;
char *oom_score_adj;
size_t oom_score_adj_len;
};
/*
* List of netlink message types sent to us as part of bootstrapping the init.
* These constants are defined in libcontainer/message_linux.go.
*/
#define INIT_MSG 62000
#define INIT_MSG 62000
#define CLONE_FLAGS_ATTR 27281
#define NS_PATHS_ATTR 27282
#define UIDMAP_ATTR 27283
#define GIDMAP_ATTR 27284
#define UIDMAP_ATTR 27283
#define GIDMAP_ATTR 27284
#define SETGROUP_ATTR 27285
#define OOM_SCORE_ADJ_ATTR 27286
#define ROOTLESS_ATTR 27287
/*
* Use the raw syscall for versions of glibc which don't include a function for
@ -171,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
policy = "deny";
break;
case SETGROUPS_DEFAULT:
default:
/* Nothing to do. */
return;
}
@ -185,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
}
}
static void update_uidmap(int pid, char *map, int map_len)
static void update_uidmap(int pid, char *map, size_t map_len)
{
if (map == NULL || map_len <= 0)
return;
@ -194,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len)
bail("failed to update /proc/%d/uid_map", pid);
}
static void update_gidmap(int pid, char *map, int map_len)
static void update_gidmap(int pid, char *map, size_t map_len)
{
if (map == NULL || map_len <= 0)
return;
@ -203,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len)
bail("failed to update /proc/%d/gid_map", pid);
}
static void update_oom_score_adj(char *data, size_t len)
{
if (data == NULL || len <= 0)
return;
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
bail("failed to update /proc/self/oom_score_adj");
}
/* A dummy function that just jumps to the given jumpval. */
static int child_func(void *arg) __attribute__ ((noinline));
static int child_func(void *arg)
@ -284,7 +300,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
/* Retrieve the netlink header. */
len = read(fd, &hdr, NLMSG_HDRLEN);
if (len != NLMSG_HDRLEN)
bail("invalid netlink header length %lu", len);
bail("invalid netlink header length %zu", len);
if (hdr.nlmsg_type == NLMSG_ERROR)
bail("failed to read netlink message");
@ -300,7 +316,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
len = read(fd, data, size);
if (len != size)
bail("failed to read netlink payload, %lu != %lu", len, size);
bail("failed to read netlink payload, %zu != %zu", len, size);
/* Parse the netlink payload. */
config->data = data;
@ -316,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config)
case CLONE_FLAGS_ATTR:
config->cloneflags = readint32(current);
break;
case ROOTLESS_ATTR:
config->is_rootless = readint8(current);
break;
case OOM_SCORE_ADJ_ATTR:
config->oom_score_adj = current;
config->oom_score_adj_len = payload_len;
break;
case NS_PATHS_ATTR:
config->namespaces = current;
config->namespaces_len = payload_len;
@ -413,7 +436,7 @@ void nsexec(void)
{
int pipenum;
jmp_buf env;
int syncpipe[2];
int sync_child_pipe[2], sync_grandchild_pipe[2];
struct nlconfig_t config = {0};
/*
@ -424,18 +447,43 @@ void nsexec(void)
if (pipenum == -1)
return;
/* make the process non-dumpable */
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
bail("failed to set process as non-dumpable");
}
/* Parse all of the netlink configuration. */
nl_parse(pipenum, &config);
/* Set oom_score_adj. This has to be done before !dumpable because
* /proc/self/oom_score_adj is not writeable unless you're an privileged
* user (if !dumpable is set). All children inherit their parent's
* oom_score_adj value on fork(2) so this will always be propagated
* properly.
*/
update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
/*
* Make the process non-dumpable, to avoid various race conditions that
* could cause processes in namespaces we're joining to access host
* resources (or potentially execute code).
*
* However, if the number of namespaces we are joining is 0, we are not
* going to be switching to a different security context. Thus setting
* ourselves to be non-dumpable only breaks things (like rootless
* containers), which is the recommendation from the kernel folks.
*/
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
bail("failed to set process as non-dumpable");
}
/* Pipe so we can tell the child when we've finished setting up. */
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
bail("failed to setup sync pipe between parent and child");
/*
* We need a new socketpair to sync with grandchild so we don't have
* race condition with child.
*/
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
bail("failed to setup sync pipe between parent and grandchild");
/* TODO: Currently we aren't dealing with child deaths properly. */
/*
@ -494,9 +542,10 @@ void nsexec(void)
* process.
*/
case JUMP_PARENT: {
int len, ready = 0;
int len;
pid_t child;
char buf[JSON_MAX];
bool ready = false;
/* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
@ -513,30 +562,39 @@ void nsexec(void)
* ready, so we can receive all possible error codes
* generated by children.
*/
while (ready < 2) {
while (!ready) {
enum sync_t s;
int ret;
/* This doesn't need to be global, we're in the parent. */
int syncfd = syncpipe[1];
syncfd = sync_child_pipe[1];
close(sync_child_pipe[0]);
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state");
switch (s) {
case SYNC_ERR: {
/* We have to mirror the error code of the child. */
int ret;
case SYNC_ERR:
/* We have to mirror the error code of the child. */
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)");
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)");
exit(ret);
}
break;
exit(ret);
case SYNC_USERMAP_PLS:
/* Enable setgroups(2) if we've been asked to. */
/*
* Enable setgroups(2) if we've been asked to. But we also
* have to explicitly disable setgroups(2) if we're
* creating a rootless container (this is required since
* Linux 3.19).
*/
if (config.is_rootless && config.is_setgroup) {
kill(child, SIGKILL);
bail("cannot allow setgroup in an unprivileged user namespace setup");
}
if (config.is_setgroup)
update_setgroups(child, SETGROUPS_ALLOW);
if (config.is_rootless)
update_setgroups(child, SETGROUPS_DENY);
/* Set up mappings. */
update_uidmap(child, config.uidmap, config.uidmap_len);
@ -548,11 +606,6 @@ void nsexec(void)
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
}
break;
case SYNC_USERMAP_ACK:
/* We should _never_ receive acks. */
kill(child, SIGKILL);
bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
break;
case SYNC_RECVPID_PLS: {
pid_t old = child;
@ -570,20 +623,46 @@ void nsexec(void)
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
}
}
ready++;
break;
case SYNC_RECVPID_ACK:
/* We should _never_ receive acks. */
kill(child, SIGKILL);
bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
break;
case SYNC_CHILD_READY:
ready++;
ready = true;
break;
default:
bail("unexpected sync value");
bail("unexpected sync value: %u", s);
}
}
/* Now sync with grandchild. */
ready = false;
while (!ready) {
enum sync_t s;
int ret;
syncfd = sync_grandchild_pipe[1];
close(sync_grandchild_pipe[0]);
s = SYNC_GRANDCHILD;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(child, SIGKILL);
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
}
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state");
switch (s) {
case SYNC_ERR:
/* We have to mirror the error code of the child. */
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)");
exit(ret);
case SYNC_CHILD_READY:
ready = true;
break;
default:
bail("unexpected sync value: %u", s);
}
}
@ -615,7 +694,8 @@ void nsexec(void)
enum sync_t s;
/* We're in a child and thus need to tell the parent if we die. */
syncfd = syncpipe[0];
syncfd = sync_child_pipe[0];
close(sync_child_pipe[1]);
/* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
@ -653,6 +733,11 @@ void nsexec(void)
* clone_parent rant). So signal our parent to hook us up.
*/
/* Switching is only necessary if we joined namespaces. */
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
bail("failed to set process as dumpable");
}
s = SYNC_USERMAP_PLS;
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
@ -663,6 +748,11 @@ void nsexec(void)
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
if (s != SYNC_USERMAP_ACK)
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
/* Switching is only necessary if we joined namespaces. */
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
bail("failed to set process as dumpable");
}
}
/*
@ -700,6 +790,12 @@ void nsexec(void)
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
}
s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(child, SIGKILL);
bail("failed to sync with parent: write(SYNC_CHILD_READY)");
}
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
exit(0);
}
@ -718,11 +814,19 @@ void nsexec(void)
enum sync_t s;
/* We're in a child and thus need to tell the parent if we die. */
syncfd = syncpipe[0];
syncfd = sync_grandchild_pipe[0];
close(sync_grandchild_pipe[1]);
close(sync_child_pipe[0]);
close(sync_child_pipe[1]);
/* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
if (s != SYNC_GRANDCHILD)
bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
if (setsid() < 0)
bail("setsid failed");
@ -732,16 +836,17 @@ void nsexec(void)
if (setgid(0) < 0)
bail("setgid failed");
if (setgroups(0, NULL) < 0)
bail("setgroups failed");
if (!config.is_rootless && config.is_setgroup) {
if (setgroups(0, NULL) < 0)
bail("setgroups failed");
}
s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
/* Close sync pipes. */
close(syncpipe[0]);
close(syncpipe[1]);
close(sync_grandchild_pipe[0]);
/* Free netlink data. */
nl_free(&config);
@ -751,7 +856,6 @@ void nsexec(void)
}
default:
bail("unexpected jump value");
break;
}
/* Should never be reached. */

View file

@ -7,8 +7,10 @@ import (
"fmt"
"os"
"os/exec"
"syscall"
"syscall" // only for exec
"unsafe"
"golang.org/x/sys/unix"
)
// If arg2 is nonzero, set the "child subreaper" attribute of the
@ -53,8 +55,8 @@ func Execv(cmd string, args []string, env []string) error {
return syscall.Exec(name, args, env)
}
func Prlimit(pid, resource int, limit syscall.Rlimit) error {
_, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
func Prlimit(pid, resource int, limit unix.Rlimit) error {
_, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
if err != 0 {
return err
}
@ -62,7 +64,7 @@ func Prlimit(pid, resource int, limit syscall.Rlimit) error {
}
func SetParentDeathSignal(sig uintptr) error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
return err
}
return nil
@ -70,15 +72,14 @@ func SetParentDeathSignal(sig uintptr) error {
func GetParentDeathSignal() (ParentDeathSignal, error) {
var sig int
_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
if err != 0 {
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
return -1, err
}
return ParentDeathSignal(sig), nil
}
func SetKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
return err
}
@ -86,7 +87,7 @@ func SetKeepCaps() error {
}
func ClearKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
return err
}
@ -94,7 +95,7 @@ func ClearKeepCaps() error {
}
func Setctty() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
return err
}
return nil
@ -131,13 +132,5 @@ func RunningInUserNS() bool {
// SetSubreaper sets the value i as the subreaper setting for the calling process
func SetSubreaper(i int) error {
return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}
func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if e1 != 0 {
err = e1
}
return
return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}

View file

@ -1,43 +1,113 @@
package system
import (
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
// look in /proc to find the process start time so that we can verify
// that this pid has started after ourself
// State is the status of a process.
type State rune
const ( // Only values for Linux 3.14 and later are listed here
Dead State = 'X'
DiskSleep State = 'D'
Running State = 'R'
Sleeping State = 'S'
Stopped State = 'T'
TracingStop State = 't'
Zombie State = 'Z'
)
// String forms of the state from proc(5)'s documentation for
// /proc/[pid]/status' "State" field.
func (s State) String() string {
switch s {
case Dead:
return "dead"
case DiskSleep:
return "disk sleep"
case Running:
return "running"
case Sleeping:
return "sleeping"
case Stopped:
return "stopped"
case TracingStop:
return "tracing stop"
case Zombie:
return "zombie"
default:
return fmt.Sprintf("unknown (%c)", s)
}
}
// Stat_t represents the information from /proc/[pid]/stat, as
// described in proc(5) with names based on the /proc/[pid]/status
// fields.
type Stat_t struct {
// PID is the process ID.
PID uint
// Name is the command run by the process.
Name string
// State is the state of the process.
State State
// StartTime is the number of clock ticks after system boot (since
// Linux 2.6).
StartTime uint64
}
// Stat returns a Stat_t instance for the specified process.
func Stat(pid int) (stat Stat_t, err error) {
bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
if err != nil {
return stat, err
}
return parseStat(string(bytes))
}
// GetProcessStartTime is deprecated. Use Stat(pid) and
// Stat_t.StartTime instead.
func GetProcessStartTime(pid int) (string, error) {
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
stat, err := Stat(pid)
if err != nil {
return "", err
}
return parseStartTime(string(data))
return fmt.Sprintf("%d", stat.StartTime), nil
}
func parseStartTime(stat string) (string, error) {
// the starttime is located at pos 22
// from the man page
//
// starttime %llu (was %lu before Linux 2.6)
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
// (divide by sysconf(_SC_CLK_TCK)).
//
// NOTE:
// pos 2 could contain space and is inside `(` and `)`:
// (2) comm %s
// The filename of the executable, in parentheses.
// This is visible whether or not the executable is
// swapped out.
//
// the following is an example:
func parseStat(data string) (stat Stat_t, err error) {
// From proc(5), field 2 could contain space and is inside `(` and `)`.
// The following is an example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
i := strings.LastIndex(data, ")")
if i <= 2 || i >= len(data)-1 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
// get parts after last `)`:
s := strings.Split(stat, ")")
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
return parts[22-3], nil // starts at 3 (after the filename pos `2`)
parts := strings.SplitN(data[:i], "(", 2)
if len(parts) != 2 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
stat.Name = parts[1]
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
if err != nil {
return stat, err
}
// parts indexes should be offset by 3 from the field number given
// proc(5), because parts is zero-indexed and we've removed fields
// one (PID) and two (Name) in the paren-split.
parts = strings.Split(data[i+2:], " ")
var state int
fmt.Sscanf(parts[3-3], "%c", &state)
stat.State = State(state)
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
return stat, nil
}

View file

@ -1,40 +0,0 @@
package system
import (
"fmt"
"runtime"
"syscall"
)
// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
//
// We need different setns values for the different platforms and arch
// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
var setNsMap = map[string]uintptr{
"linux/386": 346,
"linux/arm64": 268,
"linux/amd64": 308,
"linux/arm": 375,
"linux/ppc": 350,
"linux/ppc64": 350,
"linux/ppc64le": 350,
"linux/s390x": 339,
}
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
func SysSetns() uint32 {
return uint32(sysSetns)
}
func Setns(fd uintptr, flags uintptr) error {
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
if !exists {
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
}
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
if err != 0 {
return err
}
return nil
}

View file

@ -3,12 +3,12 @@
package system
import (
"syscall"
"golang.org/x/sys/unix"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}

View file

@ -3,12 +3,12 @@
package system
import (
"syscall"
"golang.org/x/sys/unix"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}

View file

@ -3,12 +3,12 @@
package system
import (
"syscall"
"golang.org/x/sys/unix"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}

View file

@ -1,99 +1,35 @@
package system
import (
"syscall"
"unsafe"
)
var _zero uintptr
// Returns the size of xattrs and nil error
// Requires path, takes allocated []byte or nil as last argument
func Llistxattr(path string, dest []byte) (size int, err error) {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return -1, err
}
var newpathBytes unsafe.Pointer
if len(dest) > 0 {
newpathBytes = unsafe.Pointer(&dest[0])
} else {
newpathBytes = unsafe.Pointer(&_zero)
}
_size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0)
size = int(_size)
if errno != 0 {
return -1, errno
}
return size, nil
}
import "golang.org/x/sys/unix"
// Returns a []byte slice if the xattr is set and nil otherwise
// Requires path and its attribute as arguments
func Lgetxattr(path string, attr string) ([]byte, error) {
var sz int
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return nil, err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return nil, err
}
// Start with a 128 length byte array
sz = 128
dest := make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
dest := make([]byte, 128)
sz, errno := unix.Lgetxattr(path, attr, dest)
switch {
case errno == syscall.ENODATA:
case errno == unix.ENODATA:
return nil, errno
case errno == syscall.ENOTSUP:
case errno == unix.ENOTSUP:
return nil, errno
case errno == syscall.ERANGE:
case errno == unix.ERANGE:
// 128 byte array might just not be good enough,
// A dummy buffer is used ``uintptr(0)`` to get real size
// A dummy buffer is used to get the real size
// of the xattrs on disk
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
sz = int(_sz)
if sz < 0 {
sz, errno = unix.Lgetxattr(path, attr, []byte{})
if errno != nil {
return nil, errno
}
dest = make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
if errno != 0 {
sz, errno = unix.Lgetxattr(path, attr, dest)
if errno != nil {
return nil, errno
}
case errno != 0:
case errno != nil:
return nil, errno
}
sz = int(_sz)
return dest[:sz], nil
}
func Lsetxattr(path string, attr string, data []byte, flags int) error {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return err
}
var dataBytes unsafe.Pointer
if len(data) > 0 {
dataBytes = unsafe.Pointer(&data[0])
} else {
dataBytes = unsafe.Pointer(&_zero)
}
_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
if errno != 0 {
return errno
}
return nil
}

View file

@ -2,7 +2,8 @@ package user
import (
"errors"
"syscall"
"golang.org/x/sys/unix"
)
var (
@ -40,7 +41,7 @@ func lookupUser(filter func(u User) bool) (User, error) {
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(syscall.Getuid())
return LookupUid(unix.Getuid())
}
// LookupUser looks up a user by their username in /etc/passwd. If the user
@ -88,7 +89,7 @@ func lookupGroup(filter func(g Group) bool) (Group, error) {
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(syscall.Getgid())
return LookupGid(unix.Getgid())
}
// LookupGroup looks up a group by its name in /etc/group. If the group cannot

View file

@ -199,18 +199,16 @@ type ExecUser struct {
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
passwd, err := os.Open(passwdPath)
if err != nil {
passwd = nil
} else {
defer passwd.Close()
var passwd, group io.Reader
if passwdFile, err := os.Open(passwdPath); err == nil {
passwd = passwdFile
defer passwdFile.Close()
}
group, err := os.Open(groupPath)
if err != nil {
group = nil
} else {
defer group.Close()
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
}
return GetExecUser(userSpec, defaults, passwd, group)
@ -360,8 +358,8 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
// Okay, so it's numeric. We can just roll with this.
}
} else if len(groups) > 0 {
// Supplementary group ids only make sense if in the implicit form.
} else if len(groups) > 0 && uidErr != nil {
// Supplementary group ids only make sense if in the implicit form for non-numeric users.
user.Sgids = make([]int, len(groups))
for i, group := range groups {
user.Sgids[i] = group.Gid
@ -433,9 +431,11 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
// that opens the groupPath given and gives it as an argument to
// GetAdditionalGroups.
func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
group, err := os.Open(groupPath)
if err == nil {
defer group.Close()
var group io.Reader
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
}
return GetAdditionalGroups(additionalGroups, group)
}

View file

@ -1,148 +0,0 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include "cmsg.h"
#define error(fmt, ...) \
({ \
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
errno = ECOMM; \
goto err; /* return value */ \
})
/*
* Sends a file descriptor along the sockfd provided. Returns the return
* value of sendmsg(2). Any synchronisation and preparation of state
* should be done external to this (we expect the other side to be in
* recvfd() in the code).
*/
ssize_t sendfd(int sockfd, struct file_t file)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
int *fdptr;
int ret;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/*
* We need to send some other data along with the ancillary data,
* otherwise the other side won't recieve any data. This is very
* well-hidden in the documentation (and only applies to
* SOCK_STREAM). See the bottom part of unix(7).
*/
iov[0].iov_base = file.name;
iov[0].iov_len = strlen(file.name) + 1;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
fdptr = (int *) CMSG_DATA(cmsg);
memcpy(fdptr, &file.fd, sizeof(int));
return sendmsg(sockfd, &msg, 0);
}
/*
* Receives a file descriptor from the sockfd provided. Returns the file
* descriptor as sent from sendfd(). It will return the file descriptor
* or die (literally) trying. Any synchronisation and preparation of
* state should be done external to this (we expect the other side to be
* in sendfd() in the code).
*/
struct file_t recvfd(int sockfd)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
struct file_t file = {0};
int *fdptr;
int olderrno;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/* Allocate a buffer. */
/* TODO: Make this dynamic with MSG_PEEK. */
file.name = malloc(TAG_BUFFER);
if (!file.name)
error("recvfd: failed to allocate file.tag buffer\n");
/*
* We need to "recieve" the non-ancillary data even though we don't
* plan to use it at all. Otherwise, things won't work as expected.
* See unix(7) and other well-hidden documentation.
*/
iov[0].iov_base = file.name;
iov[0].iov_len = TAG_BUFFER;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
ssize_t ret = recvmsg(sockfd, &msg, 0);
if (ret < 0)
goto err;
cmsg = CMSG_FIRSTHDR(&msg);
if (!cmsg)
error("recvfd: got NULL from CMSG_FIRSTHDR");
if (cmsg->cmsg_level != SOL_SOCKET)
error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
if (cmsg->cmsg_type != SCM_RIGHTS)
error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
error("recvfd: expected correct CMSG_LEN in cmsg: %lu", (unsigned long)cmsg->cmsg_len);
fdptr = (int *) CMSG_DATA(cmsg);
if (!fdptr || *fdptr < 0)
error("recvfd: recieved invalid pointer");
file.fd = *fdptr;
return file;
err:
olderrno = errno;
free(file.name);
errno = olderrno;
return (struct file_t){0};
}

View file

@ -3,7 +3,7 @@
package utils
/*
* Copyright 2016 SUSE LLC
* Copyright 2016, 2017 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,28 +18,66 @@ package utils
* limitations under the License.
*/
/*
#include <errno.h>
#include <stdlib.h>
#include "cmsg.h"
*/
import "C"
import (
"fmt"
"os"
"unsafe"
"golang.org/x/sys/unix"
)
// MaxSendfdLen is the maximum length of the name of a file descriptor being
// sent using SendFd. The name of the file handle returned by RecvFd will never
// be larger than this value.
const MaxNameLen = 4096
// oobSpace is the size of the oob slice required to store a single FD. Note
// that unix.UnixRights appears to make the assumption that fd is always int32,
// so sizeof(fd) = 4.
var oobSpace = unix.CmsgSpace(4)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) {
file, err := C.recvfd(C.int(socket.Fd()))
// For some reason, unix.Recvmsg uses the length rather than the capacity
// when passing the msg_controllen and other attributes to recvmsg. So we
// have to actually set the length.
name := make([]byte, MaxNameLen)
oob := make([]byte, oobSpace)
sockfd := socket.Fd()
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
if err != nil {
return nil, err
}
defer C.free(unsafe.Pointer(file.name))
return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil
if n >= MaxNameLen || oobn != oobSpace {
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
name = name[:n]
oob = oob[:oobn]
scms, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return nil, err
}
if len(scms) != 1 {
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
scm := scms[0]
fds, err := unix.ParseUnixRights(&scm)
if err != nil {
return nil, err
}
if len(fds) != 1 {
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
}
fd := uintptr(fds[0])
return os.NewFile(fd, string(name)), nil
}
// SendFd sends a file descriptor over the given AF_UNIX socket. In
@ -47,11 +85,11 @@ func RecvFd(socket *os.File) (*os.File, error) {
// non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor).
func SendFd(socket, file *os.File) error {
var cfile C.struct_file_t
cfile.fd = C.int(file.Fd())
cfile.name = C.CString(file.Name())
defer C.free(unsafe.Pointer(cfile.name))
name := []byte(file.Name())
if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", file.Name())
}
oob := unix.UnixRights(int(file.Fd()))
_, err := C.sendfd(C.int(socket.Fd()), cfile)
return err
return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
}

View file

@ -1,36 +0,0 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#if !defined(CMSG_H)
#define CMSG_H
#include <sys/types.h>
/* TODO: Implement this properly with MSG_PEEK. */
#define TAG_BUFFER 4096
/* This mirrors Go's (*os.File). */
struct file_t {
char *name;
int fd;
};
struct file_t recvfd(int sockfd);
ssize_t sendfd(int sockfd, struct file_t file);
#endif /* !defined(CMSG_H) */

View file

@ -8,8 +8,9 @@ import (
"os"
"path/filepath"
"strings"
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
const (
@ -41,7 +42,7 @@ func ResolveRootfs(uncleanRootfs string) (string, error) {
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func ExitStatus(status syscall.WaitStatus) int {
func ExitStatus(status unix.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}

View file

@ -4,8 +4,10 @@ package utils
import (
"io/ioutil"
"os"
"strconv"
"syscall"
"golang.org/x/sys/unix"
)
func CloseExecFrom(minFd int) error {
@ -25,9 +27,18 @@ func CloseExecFrom(minFd int) error {
continue
}
// intentionally ignore errors from syscall.CloseOnExec
syscall.CloseOnExec(fd)
// intentionally ignore errors from unix.CloseOnExec
unix.CloseOnExec(fd)
// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
}
return nil
}
// NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}

21
vendor/github.com/opencontainers/runc/vendor.conf generated vendored Normal file
View file

@ -0,0 +1,21 @@
# OCI runtime-spec. When updating this, make sure you use a version tag rather
# than a commit ID so it's much more obvious what version of the spec we are
# using.
github.com/opencontainers/runtime-spec v1.0.0
# Core libcontainer functionality.
github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08
github.com/opencontainers/selinux v1.0.0-rc1
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
github.com/Sirupsen/logrus 26709e2714106fb8ad40b773b711ebce25b78914
github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16
github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270
# systemd integration.
github.com/coreos/go-systemd v14
github.com/coreos/pkg v3
github.com/godbus/dbus v3
github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8
# Command-line interface.
github.com/docker/docker 0f5c9d301b9b1cca66b3ea0f9dec3b5317d3686d
github.com/docker/go-units v0.2.0
github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e
golang.org/x/sys 0e0164865330d5cf1c00247be08330bf96e2f87c https://github.com/golang/sys

View file

@ -10,7 +10,6 @@ Additional documentation about how this group operates:
- [Code of Conduct][code-of-conduct]
- [Style and Conventions](style.md)
- [Roadmap](ROADMAP.md)
- [Implementations](implementations.md)
- [Releases](RELEASES.md)
- [project](project.md)
@ -33,12 +32,7 @@ Example use cases include sophisticated network configuration, volume garbage co
### Runtime Developers
Runtime developers can build runtime implementations that run OCI-compliant bundles and container configuration, containing low-level OS and host specific details, on a particular platform.
## Releases
There is a loose [Road Map](./ROADMAP.md).
During the `0.x` series of OCI releases we make no backwards compatibility guarantees and intend to break the schema during this series.
Runtime developers can build runtime implementations that run OCI-compliant bundles and container configuration, containing low-level OS and host-specific details, on a particular platform.
## Contributing
@ -60,10 +54,16 @@ When in doubt, start on the [mailing-list](#mailing-list).
### Weekly Call
The contributors and maintainers of all OCI projects have a weekly meeting Wednesdays at 2:00 PM (USA Pacific).
Everyone is welcome to participate via [UberConference web][uberconference] or audio-only: 415-968-0849 (no PIN needed.)
The contributors and maintainers of all OCI projects have a weekly meeting on Wednesdays at:
* 8:00 AM (USA Pacific), during [odd weeks][iso-week].
* 2:00 PM (USA Pacific), during [even weeks][iso-week].
There is an [iCalendar][rfc5545] format for the meetings [here](meeting.ics).
Everyone is welcome to participate via [UberConference web][uberconference] or audio-only: +1 415 968 0849 (no PIN needed).
An initial agenda will be posted to the [mailing list](#mailing-list) earlier in the week, and everyone is welcome to propose additional topics or suggest other agenda alterations there.
Minutes are posted to the [mailing list](#mailing-list) and minutes from past calls are archived to the [wiki][runtime-wiki].
Minutes are posted to the [mailing list](#mailing-list) and minutes from past calls are archived [here][minutes], with minutes from especially old meetings (September 2015 and earlier) archived [here][runtime-wiki].
### Mailing List
@ -139,7 +139,7 @@ Read more on [How to Write a Git Commit Message][how-to-git-commit] or the Discu
5. Use the imperative mood in the subject line
6. Wrap the body at 72 characters
7. Use the body to explain what and why vs. how
* If there was important/useful/essential conversation or information, copy or include a reference
* If there was important/useful/essential conversation or information, copy or include a reference
8. When possible, one keyword to scope the change in the subject (i.e. "README: ...", "runtime: ...")
@ -148,7 +148,10 @@ Read more on [How to Write a Git Commit Message][how-to-git-commit] or the Discu
[dev-list]: https://groups.google.com/a/opencontainers.org/forum/#!forum/dev
[how-to-git-commit]: http://chris.beams.io/posts/git-commit
[irc-logs]: http://ircbot.wl.linuxfoundation.org/eavesdrop/%23opencontainers/
[iso-week]: https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_of_a_given_date
[minutes]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/
[oci]: https://www.opencontainers.org
[rfc5545]: https://tools.ietf.org/html/rfc5545
[runtime-wiki]: https://github.com/opencontainers/runtime-spec/wiki
[uberconference]: https://www.uberconference.com/opencontainers

View file

@ -6,26 +6,24 @@ import "os"
type Spec struct {
// Version of the Open Container Runtime Specification with which the bundle complies.
Version string `json:"ociVersion"`
// Platform specifies the configuration's target platform.
Platform Platform `json:"platform"`
// Process configures the container process.
Process Process `json:"process"`
Process *Process `json:"process,omitempty"`
// Root configures the container's root filesystem.
Root Root `json:"root"`
Root *Root `json:"root,omitempty"`
// Hostname configures the container's hostname.
Hostname string `json:"hostname,omitempty"`
// Mounts configures additional mounts (on top of Root).
Mounts []Mount `json:"mounts,omitempty"`
// Hooks configures callbacks for container lifecycle events.
Hooks *Hooks `json:"hooks,omitempty"`
Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"`
// Annotations contains arbitrary metadata for the container.
Annotations map[string]string `json:"annotations,omitempty"`
// Linux is platform specific configuration for Linux based containers.
// Linux is platform-specific configuration for Linux based containers.
Linux *Linux `json:"linux,omitempty" platform:"linux"`
// Solaris is platform specific configuration for Solaris containers.
// Solaris is platform-specific configuration for Solaris based containers.
Solaris *Solaris `json:"solaris,omitempty" platform:"solaris"`
// Windows is platform specific configuration for Windows based containers, including Hyper-V containers.
// Windows is platform-specific configuration for Windows based containers.
Windows *Windows `json:"windows,omitempty" platform:"windows"`
}
@ -34,7 +32,7 @@ type Process struct {
// Terminal creates an interactive terminal for the container.
Terminal bool `json:"terminal,omitempty"`
// ConsoleSize specifies the size of the console.
ConsoleSize Box `json:"consoleSize,omitempty"`
ConsoleSize *Box `json:"consoleSize,omitempty"`
// User specifies user information for the process.
User User `json:"user"`
// Args specifies the binary and arguments for the application to execute.
@ -47,11 +45,13 @@ type Process struct {
// Capabilities are Linux capabilities that are kept for the process.
Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"`
// Rlimits specifies rlimit options to apply to the process.
Rlimits []LinuxRlimit `json:"rlimits,omitempty" platform:"linux"`
Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"`
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"`
// ApparmorProfile specifies the apparmor profile for the container.
ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"`
// Specify an oom_score_adj for the container.
OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"`
// SelinuxLabel specifies the selinux context that the container process is run as.
SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"`
}
@ -99,23 +99,13 @@ type Root struct {
Readonly bool `json:"readonly,omitempty"`
}
// Platform specifies OS and arch information for the host system that the container
// is created for.
type Platform struct {
// OS is the operating system.
OS string `json:"os"`
// Arch is the architecture
Arch string `json:"arch"`
}
// Mount specifies a mount for a container.
type Mount struct {
// Destination is the path where the mount will be placed relative to the container's root. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point.
// Destination is the absolute path where the mount will be placed in the container.
Destination string `json:"destination"`
// Type specifies the mount kind.
Type string `json:"type,omitempty"`
// Source specifies the source path of the mount. In the case of bind mounts on
// Linux based systems this would be the file on the host.
Type string `json:"type,omitempty" platform:"linux,solaris"`
// Source specifies the source path of the mount.
Source string `json:"source,omitempty"`
// Options are fstab style mount options.
Options []string `json:"options,omitempty"`
@ -132,7 +122,6 @@ type Hook struct {
// Hooks for container setup and teardown
type Hooks struct {
// Prestart is a list of hooks to be run before the container process is executed.
// On Linux, they are run after the container namespaces are created.
Prestart []Hook `json:"prestart,omitempty"`
// Poststart is a list of hooks to be run after the container process is started.
Poststart []Hook `json:"poststart,omitempty"`
@ -140,11 +129,11 @@ type Hooks struct {
Poststop []Hook `json:"poststop,omitempty"`
}
// Linux contains platform specific configuration for Linux based containers.
// Linux contains platform-specific configuration for Linux based containers.
type Linux struct {
// UIDMapping specifies user mappings for supporting user namespaces on Linux.
// UIDMapping specifies user mappings for supporting user namespaces.
UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty"`
// GIDMapping specifies group mappings for supporting user namespaces on Linux.
// GIDMapping specifies group mappings for supporting user namespaces.
GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty"`
// Sysctl are a set of key value pairs that are set for the container on start
Sysctl map[string]string `json:"sysctl,omitempty"`
@ -169,11 +158,14 @@ type Linux struct {
ReadonlyPaths []string `json:"readonlyPaths,omitempty"`
// MountLabel specifies the selinux context for the mounts in the container.
MountLabel string `json:"mountLabel,omitempty"`
// IntelRdt contains Intel Resource Director Technology (RDT) information
// for handling resource constraints (e.g., L3 cache) for the container
IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"`
}
// LinuxNamespace is the configuration for a Linux namespace
type LinuxNamespace struct {
// Type is the type of Linux namespace
// Type is the type of namespace
Type LinuxNamespaceType `json:"type"`
// Path is a path to an existing namespace persisted on disk that can be joined
// and is of the same type
@ -210,8 +202,8 @@ type LinuxIDMapping struct {
Size uint32 `json:"size"`
}
// LinuxRlimit type and restrictions
type LinuxRlimit struct {
// POSIXRlimit type and restrictions
type POSIXRlimit struct {
// Type of the rlimit to set
Type string `json:"type"`
// Hard is the hard limit for the specified type
@ -244,12 +236,12 @@ type linuxBlockIODevice struct {
Minor int64 `json:"minor"`
}
// LinuxWeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice
// LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice
type LinuxWeightDevice struct {
linuxBlockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
// Weight is the bandwidth rate for the device.
Weight *uint16 `json:"weight,omitempty"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only
LeafWeight *uint16 `json:"leafWeight,omitempty"`
}
@ -262,36 +254,38 @@ type LinuxThrottleDevice struct {
// LinuxBlockIO for Linux cgroup 'blkio' resource management
type LinuxBlockIO struct {
// Specifies per cgroup weight, range is from 10 to 1000
Weight *uint16 `json:"blkioWeight,omitempty"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
LeafWeight *uint16 `json:"blkioLeafWeight,omitempty"`
// Specifies per cgroup weight
Weight *uint16 `json:"weight,omitempty"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, CFQ scheduler only
LeafWeight *uint16 `json:"leafWeight,omitempty"`
// Weight per cgroup per device, can override BlkioWeight
WeightDevice []LinuxWeightDevice `json:"blkioWeightDevice,omitempty"`
WeightDevice []LinuxWeightDevice `json:"weightDevice,omitempty"`
// IO read rate limit per cgroup per device, bytes per second
ThrottleReadBpsDevice []LinuxThrottleDevice `json:"blkioThrottleReadBpsDevice,omitempty"`
ThrottleReadBpsDevice []LinuxThrottleDevice `json:"throttleReadBpsDevice,omitempty"`
// IO write rate limit per cgroup per device, bytes per second
ThrottleWriteBpsDevice []LinuxThrottleDevice `json:"blkioThrottleWriteBpsDevice,omitempty"`
ThrottleWriteBpsDevice []LinuxThrottleDevice `json:"throttleWriteBpsDevice,omitempty"`
// IO read rate limit per cgroup per device, IO per second
ThrottleReadIOPSDevice []LinuxThrottleDevice `json:"blkioThrottleReadIOPSDevice,omitempty"`
ThrottleReadIOPSDevice []LinuxThrottleDevice `json:"throttleReadIOPSDevice,omitempty"`
// IO write rate limit per cgroup per device, IO per second
ThrottleWriteIOPSDevice []LinuxThrottleDevice `json:"blkioThrottleWriteIOPSDevice,omitempty"`
ThrottleWriteIOPSDevice []LinuxThrottleDevice `json:"throttleWriteIOPSDevice,omitempty"`
}
// LinuxMemory for Linux cgroup 'memory' resource management
type LinuxMemory struct {
// Memory limit (in bytes).
Limit *uint64 `json:"limit,omitempty"`
Limit *int64 `json:"limit,omitempty"`
// Memory reservation or soft_limit (in bytes).
Reservation *uint64 `json:"reservation,omitempty"`
Reservation *int64 `json:"reservation,omitempty"`
// Total memory limit (memory + swap).
Swap *uint64 `json:"swap,omitempty"`
Swap *int64 `json:"swap,omitempty"`
// Kernel memory limit (in bytes).
Kernel *uint64 `json:"kernel,omitempty"`
Kernel *int64 `json:"kernel,omitempty"`
// Kernel memory limit for tcp (in bytes)
KernelTCP *uint64 `json:"kernelTCP,omitempty"`
// How aggressive the kernel will swap memory pages. Range from 0 to 100.
KernelTCP *int64 `json:"kernelTCP,omitempty"`
// How aggressive the kernel will swap memory pages.
Swappiness *uint64 `json:"swappiness,omitempty"`
// DisableOOMKiller disables the OOM killer for out of memory conditions
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
}
// LinuxCPU for Linux cgroup 'cpu' resource management
@ -330,10 +324,6 @@ type LinuxNetwork struct {
type LinuxResources struct {
// Devices configures the device whitelist.
Devices []LinuxDeviceCgroup `json:"devices,omitempty"`
// DisableOOMKiller disables the OOM killer for out of memory conditions
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
// Specify an oom_score_adj for the container.
OOMScoreAdj *int `json:"oomScoreAdj,omitempty"`
// Memory restriction configuration
Memory *LinuxMemory `json:"memory,omitempty"`
// CPU resource restriction configuration
@ -380,7 +370,7 @@ type LinuxDeviceCgroup struct {
Access string `json:"access,omitempty"`
}
// Solaris contains platform specific configuration for Solaris application containers.
// Solaris contains platform-specific configuration for Solaris application containers.
type Solaris struct {
// SMF FMRI which should go "online" before we start the container process.
Milestone string `json:"milestone,omitempty"`
@ -427,8 +417,20 @@ type SolarisAnet struct {
// Windows defines the runtime configuration for Windows based containers, including Hyper-V containers.
type Windows struct {
// LayerFolders contains a list of absolute paths to directories containing image layers.
LayerFolders []string `json:"layerFolders"`
// Resources contains information for handling resource constraints for the container.
Resources *WindowsResources `json:"resources,omitempty"`
// CredentialSpec contains a JSON object describing a group Managed Service Account (gMSA) specification.
CredentialSpec interface{} `json:"credentialSpec,omitempty"`
// Servicing indicates if the container is being started in a mode to apply a Windows Update servicing operation.
Servicing bool `json:"servicing,omitempty"`
// IgnoreFlushesDuringBoot indicates if the container is being started in a mode where disk writes are not flushed during its boot process.
IgnoreFlushesDuringBoot bool `json:"ignoreFlushesDuringBoot,omitempty"`
// HyperV contains information for running a container with Hyper-V isolation.
HyperV *WindowsHyperV `json:"hyperv,omitempty"`
// Network restriction configuration.
Network *WindowsNetwork `json:"network,omitempty"`
}
// WindowsResources has container runtime resource constraints for containers running on Windows.
@ -439,26 +441,22 @@ type WindowsResources struct {
CPU *WindowsCPUResources `json:"cpu,omitempty"`
// Storage restriction configuration.
Storage *WindowsStorageResources `json:"storage,omitempty"`
// Network restriction configuration.
Network *WindowsNetworkResources `json:"network,omitempty"`
}
// WindowsMemoryResources contains memory resource management settings.
type WindowsMemoryResources struct {
// Memory limit in bytes.
Limit *uint64 `json:"limit,omitempty"`
// Memory reservation in bytes.
Reservation *uint64 `json:"reservation,omitempty"`
}
// WindowsCPUResources contains CPU resource management settings.
type WindowsCPUResources struct {
// Number of CPUs available to the container.
Count *uint64 `json:"count,omitempty"`
// CPU shares (relative weight to other containers with cpu shares). Range is from 1 to 10000.
// CPU shares (relative weight to other containers with cpu shares).
Shares *uint16 `json:"shares,omitempty"`
// Percent of available CPUs usable by the container.
Percent *uint8 `json:"percent,omitempty"`
// Specifies the portion of processor cycles that this container can use as a percentage times 100.
Maximum *uint16 `json:"maximum,omitempty"`
}
// WindowsStorageResources contains storage resource management settings.
@ -471,17 +469,29 @@ type WindowsStorageResources struct {
SandboxSize *uint64 `json:"sandboxSize,omitempty"`
}
// WindowsNetworkResources contains network resource management settings.
type WindowsNetworkResources struct {
// EgressBandwidth is the maximum egress bandwidth in bytes per second.
EgressBandwidth *uint64 `json:"egressBandwidth,omitempty"`
// WindowsNetwork contains network settings for Windows containers.
type WindowsNetwork struct {
// List of HNS endpoints that the container should connect to.
EndpointList []string `json:"endpointList,omitempty"`
// Specifies if unqualified DNS name resolution is allowed.
AllowUnqualifiedDNSQuery bool `json:"allowUnqualifiedDNSQuery,omitempty"`
// Comma separated list of DNS suffixes to use for name resolution.
DNSSearchList []string `json:"DNSSearchList,omitempty"`
// Name (ID) of the container that we will share with the network stack.
NetworkSharedContainerName string `json:"networkSharedContainerName,omitempty"`
}
// WindowsHyperV contains information for configuring a container to run with Hyper-V isolation.
type WindowsHyperV struct {
// UtilityVMPath is an optional path to the image used for the Utility VM.
UtilityVMPath string `json:"utilityVMPath,omitempty"`
}
// LinuxSeccomp represents syscall restrictions
type LinuxSeccomp struct {
DefaultAction LinuxSeccompAction `json:"defaultAction"`
Architectures []Arch `json:"architectures,omitempty"`
Syscalls []LinuxSyscall `json:"syscalls"`
Syscalls []LinuxSyscall `json:"syscalls,omitempty"`
}
// Arch used for additional architectures
@ -540,14 +550,21 @@ const (
type LinuxSeccompArg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"valueTwo"`
ValueTwo uint64 `json:"valueTwo,omitempty"`
Op LinuxSeccompOperator `json:"op"`
}
// LinuxSyscall is used to match a syscall in Seccomp
type LinuxSyscall struct {
Names []string `json:"names"`
Action LinuxSeccompAction `json:"action"`
Args []LinuxSeccompArg `json:"args"`
Comment string `json:"comment"`
Names []string `json:"names"`
Action LinuxSeccompAction `json:"action"`
Args []LinuxSeccompArg `json:"args,omitempty"`
}
// LinuxIntelRdt has container runtime resource constraints
// for Intel RDT/CAT which introduced in Linux 4.10 kernel
type LinuxIntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
}

View file

@ -9,7 +9,7 @@ type State struct {
// Status is the runtime status of the container.
Status string `json:"status"`
// Pid is the process ID for the container process.
Pid int `json:"pid"`
Pid int `json:"pid,omitempty"`
// Bundle is the path to the container's bundle directory.
Bundle string `json:"bundle"`
// Annotations are key values associated with the container.

View file

@ -11,7 +11,7 @@ const (
VersionPatch = 0
// VersionDev indicates development branch. Releases will be empty string.
VersionDev = "-rc5"
VersionDev = ""
)
// Version is the specification version that the package types support.

View file

@ -1,6 +1,7 @@
# oci-runtime-tool [![Build Status](https://travis-ci.org/opencontainers/runtime-tools.svg?branch=master)](https://travis-ci.org/opencontainers/runtime-tools) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runtime-tools)](https://goreportcard.com/report/github.com/opencontainers/runtime-tools)
oci-runtime-tool is a collection of tools for working with the [OCI runtime specification][runtime-spec].
To build from source code, runtime-tools requires Go 1.7.x or above.
## Generating an OCI runtime spec configuration files

View file

@ -6,7 +6,6 @@ import (
"fmt"
"io"
"os"
"runtime"
"strings"
rspec "github.com/opencontainers/runtime-spec/specs-go"
@ -35,15 +34,11 @@ type ExportOptions struct {
func New() Generator {
spec := rspec.Spec{
Version: rspec.Version,
Platform: rspec.Platform{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
},
Root: rspec.Root{
Root: &rspec.Root{
Path: "",
Readonly: false,
},
Process: rspec.Process{
Process: &rspec.Process{
Terminal: false,
User: rspec.User{},
Args: []string{
@ -136,7 +131,7 @@ func New() Generator {
"CAP_AUDIT_WRITE",
},
},
Rlimits: []rspec.LinuxRlimit{
Rlimits: []rspec.POSIXRlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
@ -308,13 +303,13 @@ func (g *Generator) SetVersion(version string) {
// SetRootPath sets g.spec.Root.Path.
func (g *Generator) SetRootPath(path string) {
g.initSpec()
g.initSpecRoot()
g.spec.Root.Path = path
}
// SetRootReadonly sets g.spec.Root.Readonly.
func (g *Generator) SetRootReadonly(b bool) {
g.initSpec()
g.initSpecRoot()
g.spec.Root.Readonly = b
}
@ -346,57 +341,45 @@ func (g *Generator) RemoveAnnotation(key string) {
delete(g.spec.Annotations, key)
}
// SetPlatformOS sets g.spec.Process.OS.
func (g *Generator) SetPlatformOS(os string) {
g.initSpec()
g.spec.Platform.OS = os
}
// SetPlatformArch sets g.spec.Platform.Arch.
func (g *Generator) SetPlatformArch(arch string) {
g.initSpec()
g.spec.Platform.Arch = arch
}
// SetProcessUID sets g.spec.Process.User.UID.
func (g *Generator) SetProcessUID(uid uint32) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.User.UID = uid
}
// SetProcessGID sets g.spec.Process.User.GID.
func (g *Generator) SetProcessGID(gid uint32) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.User.GID = gid
}
// SetProcessCwd sets g.spec.Process.Cwd.
func (g *Generator) SetProcessCwd(cwd string) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.Cwd = cwd
}
// SetProcessNoNewPrivileges sets g.spec.Process.NoNewPrivileges.
func (g *Generator) SetProcessNoNewPrivileges(b bool) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.NoNewPrivileges = b
}
// SetProcessTerminal sets g.spec.Process.Terminal.
func (g *Generator) SetProcessTerminal(b bool) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.Terminal = b
}
// SetProcessApparmorProfile sets g.spec.Process.ApparmorProfile.
func (g *Generator) SetProcessApparmorProfile(prof string) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.ApparmorProfile = prof
}
// SetProcessArgs sets g.spec.Process.Args.
func (g *Generator) SetProcessArgs(args []string) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.Args = args
}
@ -411,7 +394,7 @@ func (g *Generator) ClearProcessEnv() {
// AddProcessEnv adds name=value into g.spec.Process.Env, or replaces an
// existing entry with the given name.
func (g *Generator) AddProcessEnv(name, value string) {
g.initSpec()
g.initSpecProcess()
env := fmt.Sprintf("%s=%s", name, value)
for idx := range g.spec.Process.Env {
@ -425,7 +408,7 @@ func (g *Generator) AddProcessEnv(name, value string) {
// AddProcessRlimits adds rlimit into g.spec.Process.Rlimits.
func (g *Generator) AddProcessRlimits(rType string, rHard uint64, rSoft uint64) {
g.initSpec()
g.initSpecProcess()
for i, rlimit := range g.spec.Process.Rlimits {
if rlimit.Type == rType {
g.spec.Process.Rlimits[i].Hard = rHard
@ -434,7 +417,7 @@ func (g *Generator) AddProcessRlimits(rType string, rHard uint64, rSoft uint64)
}
}
newRlimit := rspec.LinuxRlimit{
newRlimit := rspec.POSIXRlimit{
Type: rType,
Hard: rHard,
Soft: rSoft,
@ -461,7 +444,7 @@ func (g *Generator) ClearProcessRlimits() {
if g.spec == nil {
return
}
g.spec.Process.Rlimits = []rspec.LinuxRlimit{}
g.spec.Process.Rlimits = []rspec.POSIXRlimit{}
}
// ClearProcessAdditionalGids clear g.spec.Process.AdditionalGids.
@ -474,7 +457,7 @@ func (g *Generator) ClearProcessAdditionalGids() {
// AddProcessAdditionalGid adds an additional gid into g.spec.Process.AdditionalGids.
func (g *Generator) AddProcessAdditionalGid(gid uint32) {
g.initSpec()
g.initSpecProcess()
for _, group := range g.spec.Process.User.AdditionalGids {
if group == gid {
return
@ -485,7 +468,7 @@ func (g *Generator) AddProcessAdditionalGid(gid uint32) {
// SetProcessSelinuxLabel sets g.spec.Process.SelinuxLabel.
func (g *Generator) SetProcessSelinuxLabel(label string) {
g.initSpec()
g.initSpecProcess()
g.spec.Process.SelinuxLabel = label
}
@ -501,16 +484,10 @@ func (g *Generator) SetLinuxMountLabel(label string) {
g.spec.Linux.MountLabel = label
}
// SetLinuxResourcesDisableOOMKiller sets g.spec.Linux.Resources.DisableOOMKiller.
func (g *Generator) SetLinuxResourcesDisableOOMKiller(disable bool) {
g.initSpecLinuxResources()
g.spec.Linux.Resources.DisableOOMKiller = &disable
}
// SetLinuxResourcesOOMScoreAdj sets g.spec.Linux.Resources.OOMScoreAdj.
func (g *Generator) SetLinuxResourcesOOMScoreAdj(adj int) {
g.initSpecLinuxResources()
g.spec.Linux.Resources.OOMScoreAdj = &adj
// SetProcessOOMScoreAdj sets g.spec.Process.OOMScoreAdj.
func (g *Generator) SetProcessOOMScoreAdj(adj int) {
g.initSpecProcess()
g.spec.Process.OOMScoreAdj = &adj
}
// SetLinuxResourcesCPUShares sets g.spec.Linux.Resources.CPU.Shares.
@ -555,32 +532,62 @@ func (g *Generator) SetLinuxResourcesCPUMems(mems string) {
g.spec.Linux.Resources.CPU.Mems = mems
}
// AddLinuxResourcesHugepageLimit adds or sets g.spec.Linux.Resources.HugepageLimits.
func (g *Generator) AddLinuxResourcesHugepageLimit(pageSize string, limit uint64) {
hugepageLimit := rspec.LinuxHugepageLimit{
Pagesize: pageSize,
Limit: limit,
}
g.initSpecLinuxResources()
for i, pageLimit := range g.spec.Linux.Resources.HugepageLimits {
if pageLimit.Pagesize == pageSize {
g.spec.Linux.Resources.HugepageLimits[i].Limit = limit
return
}
}
g.spec.Linux.Resources.HugepageLimits = append(g.spec.Linux.Resources.HugepageLimits, hugepageLimit)
}
// DropLinuxResourcesHugepageLimit drops a hugepage limit from g.spec.Linux.Resources.HugepageLimits.
func (g *Generator) DropLinuxResourcesHugepageLimit(pageSize string) error {
g.initSpecLinuxResources()
for i, pageLimit := range g.spec.Linux.Resources.HugepageLimits {
if pageLimit.Pagesize == pageSize {
g.spec.Linux.Resources.HugepageLimits = append(g.spec.Linux.Resources.HugepageLimits[:i], g.spec.Linux.Resources.HugepageLimits[i+1:]...)
return nil
}
}
return nil
}
// SetLinuxResourcesMemoryLimit sets g.spec.Linux.Resources.Memory.Limit.
func (g *Generator) SetLinuxResourcesMemoryLimit(limit uint64) {
func (g *Generator) SetLinuxResourcesMemoryLimit(limit int64) {
g.initSpecLinuxResourcesMemory()
g.spec.Linux.Resources.Memory.Limit = &limit
}
// SetLinuxResourcesMemoryReservation sets g.spec.Linux.Resources.Memory.Reservation.
func (g *Generator) SetLinuxResourcesMemoryReservation(reservation uint64) {
func (g *Generator) SetLinuxResourcesMemoryReservation(reservation int64) {
g.initSpecLinuxResourcesMemory()
g.spec.Linux.Resources.Memory.Reservation = &reservation
}
// SetLinuxResourcesMemorySwap sets g.spec.Linux.Resources.Memory.Swap.
func (g *Generator) SetLinuxResourcesMemorySwap(swap uint64) {
func (g *Generator) SetLinuxResourcesMemorySwap(swap int64) {
g.initSpecLinuxResourcesMemory()
g.spec.Linux.Resources.Memory.Swap = &swap
}
// SetLinuxResourcesMemoryKernel sets g.spec.Linux.Resources.Memory.Kernel.
func (g *Generator) SetLinuxResourcesMemoryKernel(kernel uint64) {
func (g *Generator) SetLinuxResourcesMemoryKernel(kernel int64) {
g.initSpecLinuxResourcesMemory()
g.spec.Linux.Resources.Memory.Kernel = &kernel
}
// SetLinuxResourcesMemoryKernelTCP sets g.spec.Linux.Resources.Memory.KernelTCP.
func (g *Generator) SetLinuxResourcesMemoryKernelTCP(kernelTCP uint64) {
func (g *Generator) SetLinuxResourcesMemoryKernelTCP(kernelTCP int64) {
g.initSpecLinuxResourcesMemory()
g.spec.Linux.Resources.Memory.KernelTCP = &kernelTCP
}
@ -591,6 +598,12 @@ func (g *Generator) SetLinuxResourcesMemorySwappiness(swappiness uint64) {
g.spec.Linux.Resources.Memory.Swappiness = &swappiness
}
// SetLinuxResourcesMemoryDisableOOMKiller sets g.spec.Linux.Resources.Memory.DisableOOMKiller.
func (g *Generator) SetLinuxResourcesMemoryDisableOOMKiller(disable bool) {
g.initSpecLinuxResourcesMemory()
g.spec.Linux.Resources.Memory.DisableOOMKiller = &disable
}
// SetLinuxResourcesNetworkClassID sets g.spec.Linux.Resources.Network.ClassID.
func (g *Generator) SetLinuxResourcesNetworkClassID(classid uint32) {
g.initSpecLinuxResourcesNetwork()
@ -839,6 +852,7 @@ func (g *Generator) SetupPrivileged(privileged bool) {
finalCapList = append(finalCapList, fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String())))
}
g.initSpecLinux()
g.initSpecProcessCapabilities()
g.spec.Process.Capabilities.Bounding = finalCapList
g.spec.Process.Capabilities.Effective = finalCapList
g.spec.Process.Capabilities.Inheritable = finalCapList
@ -869,7 +883,7 @@ func (g *Generator) AddProcessCapability(c string) error {
return err
}
g.initSpec()
g.initSpecProcessCapabilities()
for _, cap := range g.spec.Process.Capabilities.Bounding {
if strings.ToUpper(cap) == cp {
@ -916,7 +930,7 @@ func (g *Generator) DropProcessCapability(c string) error {
return err
}
g.initSpec()
g.initSpecProcessCapabilities()
for i, cap := range g.spec.Process.Capabilities.Bounding {
if strings.ToUpper(cap) == cp {
@ -968,7 +982,7 @@ func mapStrToNamespace(ns string, path string) (rspec.LinuxNamespace, error) {
case "cgroup":
return rspec.LinuxNamespace{Type: rspec.CgroupNamespace, Path: path}, nil
default:
return rspec.LinuxNamespace{}, fmt.Errorf("Should not reach here!")
return rspec.LinuxNamespace{}, fmt.Errorf("unrecognized namespace %q", ns)
}
}

View file

@ -30,8 +30,9 @@ func ParseSyscallFlag(args SyscallOpts, config *rspec.LinuxSeccomp) error {
}
action, _ := parseAction(arguments[0])
if action == config.DefaultAction {
return fmt.Errorf("default action already set as %s", action)
if action == config.DefaultAction && args.argsAreEmpty() {
// default already set, no need to make changes
return nil
}
var newSyscall rspec.LinuxSyscall
@ -96,7 +97,7 @@ func ParseDefaultAction(action string, config *rspec.LinuxSeccomp) error {
return err
}
config.DefaultAction = defaultAction
err = RemoveAllMatchingRules(config, action)
err = RemoveAllMatchingRules(config, defaultAction)
if err != nil {
return err
}
@ -125,3 +126,10 @@ func newSyscallStruct(name string, action rspec.LinuxSeccompAction, args []rspec
}
return syscallStruct
}
func (s SyscallOpts) argsAreEmpty() bool {
return (s.Index == "" &&
s.Value == "" &&
s.ValueTwo == "" &&
s.Operator == "")
}

View file

@ -15,12 +15,7 @@ func RemoveAction(arguments string, config *rspec.LinuxSeccomp) error {
return fmt.Errorf("Cannot remove action from nil Seccomp pointer")
}
var syscallsToRemove []string
if strings.Contains(arguments, ",") {
syscallsToRemove = strings.Split(arguments, ",")
} else {
syscallsToRemove = append(syscallsToRemove, arguments)
}
syscallsToRemove := strings.Split(arguments, ",")
for counter, syscallStruct := range config.Syscalls {
if reflect.DeepEqual(syscallsToRemove, syscallStruct.Names) {
@ -42,16 +37,11 @@ func RemoveAllSeccompRules(config *rspec.LinuxSeccomp) error {
}
// RemoveAllMatchingRules will remove any syscall rules that match the specified action
func RemoveAllMatchingRules(config *rspec.LinuxSeccomp, action string) error {
func RemoveAllMatchingRules(config *rspec.LinuxSeccomp, seccompAction rspec.LinuxSeccompAction) error {
if config == nil {
return fmt.Errorf("Cannot remove action from nil Seccomp pointer")
}
seccompAction, err := parseAction(action)
if err != nil {
return err
}
for _, syscall := range config.Syscalls {
if reflect.DeepEqual(syscall.Action, seccompAction) {
RemoveAction(strings.Join(syscall.Names, ","), config)

View file

@ -370,26 +370,25 @@ func DefaultProfile(rs *specs.Spec) *rspec.LinuxSeccomp {
var sysCloneFlagsIndex uint
capSysAdmin := false
var cap string
var caps []string
caps := make(map[string]bool)
for _, cap = range rs.Process.Capabilities.Bounding {
caps = append(caps, cap)
for _, cap := range rs.Process.Capabilities.Bounding {
caps[cap] = true
}
for _, cap = range rs.Process.Capabilities.Effective {
caps = append(caps, cap)
for _, cap := range rs.Process.Capabilities.Effective {
caps[cap] = true
}
for _, cap = range rs.Process.Capabilities.Inheritable {
caps = append(caps, cap)
for _, cap := range rs.Process.Capabilities.Inheritable {
caps[cap] = true
}
for _, cap = range rs.Process.Capabilities.Permitted {
caps = append(caps, cap)
for _, cap := range rs.Process.Capabilities.Permitted {
caps[cap] = true
}
for _, cap = range rs.Process.Capabilities.Ambient {
caps = append(caps, cap)
for _, cap := range rs.Process.Capabilities.Ambient {
caps[cap] = true
}
for _, cap = range caps {
for cap := range caps {
switch cap {
case "CAP_DAC_READ_SEARCH":
syscalls = append(syscalls, []rspec.LinuxSyscall{

View file

@ -10,6 +10,27 @@ func (g *Generator) initSpec() {
}
}
func (g *Generator) initSpecProcess() {
g.initSpec()
if g.spec.Process == nil {
g.spec.Process = &rspec.Process{}
}
}
func (g *Generator) initSpecProcessCapabilities() {
g.initSpecProcess()
if g.spec.Process.Capabilities == nil {
g.spec.Process.Capabilities = &rspec.LinuxCapabilities{}
}
}
func (g *Generator) initSpecRoot() {
g.initSpec()
if g.spec.Root == nil {
g.spec.Root = &rspec.Root{}
}
}
func (g *Generator) initSpecAnnotations() {
g.initSpec()
if g.spec.Annotations == nil {

View file

@ -9,6 +9,7 @@ import (
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"unicode"
"unicode/utf8"
@ -47,15 +48,27 @@ type Validator struct {
spec *rspec.Spec
bundlePath string
HostSpecific bool
platform string
}
// NewValidator creates a Validator
func NewValidator(spec *rspec.Spec, bundlePath string, hostSpecific bool) Validator {
return Validator{spec: spec, bundlePath: bundlePath, HostSpecific: hostSpecific}
func NewValidator(spec *rspec.Spec, bundlePath string, hostSpecific bool, platform string) Validator {
if hostSpecific && platform != runtime.GOOS {
platform = runtime.GOOS
}
return Validator{
spec: spec,
bundlePath: bundlePath,
HostSpecific: hostSpecific,
platform: platform,
}
}
// NewValidatorFromPath creates a Validator with specified bundle path
func NewValidatorFromPath(bundlePath string, hostSpecific bool) (Validator, error) {
func NewValidatorFromPath(bundlePath string, hostSpecific bool, platform string) (Validator, error) {
if hostSpecific && platform != runtime.GOOS {
platform = runtime.GOOS
}
if bundlePath == "" {
return Validator{}, fmt.Errorf("Bundle path shouldn't be empty")
}
@ -77,18 +90,17 @@ func NewValidatorFromPath(bundlePath string, hostSpecific bool) (Validator, erro
return Validator{}, err
}
return NewValidator(&spec, bundlePath, hostSpecific), nil
return NewValidator(&spec, bundlePath, hostSpecific, platform), nil
}
// CheckAll checks all parts of runtime bundle
func (v *Validator) CheckAll() (msgs []string) {
msgs = append(msgs, v.CheckPlatform()...)
msgs = append(msgs, v.CheckRootfsPath()...)
msgs = append(msgs, v.CheckMandatoryFields()...)
msgs = append(msgs, v.CheckSemVer()...)
msgs = append(msgs, v.CheckMounts()...)
msgs = append(msgs, v.CheckPlatform()...)
msgs = append(msgs, v.CheckProcess()...)
msgs = append(msgs, v.CheckOS()...)
msgs = append(msgs, v.CheckHooks()...)
if v.spec.Linux != nil {
msgs = append(msgs, v.CheckLinux()...)
@ -131,9 +143,9 @@ func (v *Validator) CheckRootfsPath() (msgs []string) {
msgs = append(msgs, fmt.Sprintf("root.path is %q, but it MUST be a child of %q", v.spec.Root.Path, absBundlePath))
}
if v.spec.Platform.OS == "windows" {
if v.platform == "windows" {
if v.spec.Root.Readonly {
msgs = append(msgs, "root.readonly field MUST be omitted or false when platform.os is windows")
msgs = append(msgs, "root.readonly field MUST be omitted or false when target platform is windows")
}
}
@ -156,37 +168,6 @@ func (v *Validator) CheckSemVer() (msgs []string) {
return
}
// CheckPlatform checks v.spec.Platform
func (v *Validator) CheckPlatform() (msgs []string) {
logrus.Debugf("check platform")
validCombins := map[string][]string{
"android": {"arm"},
"darwin": {"386", "amd64", "arm", "arm64"},
"dragonfly": {"amd64"},
"freebsd": {"386", "amd64", "arm"},
"linux": {"386", "amd64", "arm", "arm64", "mips", "mips64", "mips64le", "mipsle", "ppc64", "ppc64le", "s390x"},
"netbsd": {"386", "amd64", "arm"},
"openbsd": {"386", "amd64", "arm"},
"plan9": {"386", "amd64"},
"solaris": {"amd64"},
"windows": {"386", "amd64"}}
platform := v.spec.Platform
for os, archs := range validCombins {
if os == platform.OS {
for _, arch := range archs {
if arch == platform.Arch {
return nil
}
}
msgs = append(msgs, fmt.Sprintf("Combination of %q and %q is invalid.", platform.OS, platform.Arch))
}
}
msgs = append(msgs, fmt.Sprintf("Operation system %q of the bundle is not supported yet.", platform.OS))
return
}
// CheckHooks check v.spec.Hooks
func (v *Validator) CheckHooks() (msgs []string) {
logrus.Debugf("check hooks")
@ -271,8 +252,7 @@ func (v *Validator) CheckProcess() (msgs []string) {
}
msgs = append(msgs, v.CheckRlimits()...)
if v.spec.Platform.OS == "linux" {
if v.platform == "linux" {
if len(process.ApparmorProfile) > 0 {
profilePath := filepath.Join(v.bundlePath, v.spec.Root.Path, "/etc/apparmor.d", process.ApparmorProfile)
_, err := os.Stat(profilePath)
@ -288,32 +268,55 @@ func (v *Validator) CheckProcess() (msgs []string) {
// CheckCapabilities checks v.spec.Process.Capabilities
func (v *Validator) CheckCapabilities() (msgs []string) {
process := v.spec.Process
if v.spec.Platform.OS == "linux" {
var caps []string
if v.platform == "linux" {
var effective, permitted, inheritable, ambient bool
caps := make(map[string][]string)
for _, cap := range process.Capabilities.Bounding {
caps = append(caps, cap)
caps[cap] = append(caps[cap], "bounding")
}
for _, cap := range process.Capabilities.Effective {
caps = append(caps, cap)
caps[cap] = append(caps[cap], "effective")
}
for _, cap := range process.Capabilities.Inheritable {
caps = append(caps, cap)
caps[cap] = append(caps[cap], "inheritable")
}
for _, cap := range process.Capabilities.Permitted {
caps = append(caps, cap)
caps[cap] = append(caps[cap], "permitted")
}
for _, cap := range process.Capabilities.Ambient {
caps = append(caps, cap)
caps[cap] = append(caps[cap], "ambient")
}
for _, capability := range caps {
for capability, owns := range caps {
if err := CapValid(capability, v.HostSpecific); err != nil {
msgs = append(msgs, fmt.Sprintf("capability %q is not valid, man capabilities(7)", capability))
}
effective, permitted, ambient, inheritable = false, false, false, false
for _, set := range owns {
if set == "effective" {
effective = true
}
if set == "inheritable" {
inheritable = true
}
if set == "permitted" {
permitted = true
}
if set == "ambient" {
ambient = true
}
}
if effective && !permitted {
msgs = append(msgs, fmt.Sprintf("effective capability %q is not allowed, as it's not permitted", capability))
}
if ambient && !(effective && inheritable) {
msgs = append(msgs, fmt.Sprintf("ambient capability %q is not allowed, as it's not permitted and inheribate", capability))
}
}
} else {
logrus.Warnf("process.capabilities validation not yet implemented for OS %q", v.spec.Platform.OS)
logrus.Warnf("process.capabilities validation not yet implemented for OS %q", v.platform)
}
return
@ -379,7 +382,7 @@ func supportedMountTypes(OS string, hostSpecific bool) (map[string]bool, error)
func (v *Validator) CheckMounts() (msgs []string) {
logrus.Debugf("check mounts")
supportedTypes, err := supportedMountTypes(v.spec.Platform.OS, v.HostSpecific)
supportedTypes, err := supportedMountTypes(v.platform, v.HostSpecific)
if err != nil {
msgs = append(msgs, err.Error())
return
@ -400,25 +403,18 @@ func (v *Validator) CheckMounts() (msgs []string) {
return
}
// CheckOS checks v.spec.Platform.OS
func (v *Validator) CheckOS() (msgs []string) {
logrus.Debugf("check os")
// CheckPlatform checks v.platform
func (v *Validator) CheckPlatform() (msgs []string) {
logrus.Debugf("check platform")
if v.spec.Platform.OS != "linux" {
if v.spec.Linux != nil {
msgs = append(msgs, fmt.Sprintf("'linux' MUST NOT be set when platform.os is %q", v.spec.Platform.OS))
}
if v.platform != "linux" && v.platform != "solaris" && v.platform != "windows" {
msgs = append(msgs, fmt.Sprintf("platform %q is not supported", v.platform))
return
}
if v.spec.Platform.OS != "solaris" {
if v.spec.Solaris != nil {
msgs = append(msgs, fmt.Sprintf("'solaris' MUST NOT be set when platform.os is %q", v.spec.Platform.OS))
}
}
if v.spec.Platform.OS != "windows" {
if v.spec.Windows != nil {
msgs = append(msgs, fmt.Sprintf("'windows' MUST NOT be set when platform.os is %q", v.spec.Platform.OS))
if v.platform == "windows" {
if v.spec.Windows == nil {
msgs = append(msgs, "'windows' MUST be set when platform is `windows`")
}
}
@ -479,7 +475,7 @@ func (v *Validator) CheckLinux() (msgs []string) {
}
}
if v.spec.Platform.OS == "linux" && !typeList[rspec.UTSNamespace].newExist && v.spec.Hostname != "" {
if v.platform == "linux" && !typeList[rspec.UTSNamespace].newExist && v.spec.Hostname != "" {
msgs = append(msgs, fmt.Sprintf("On Linux, hostname requires a new UTS namespace to be specified as well"))
}
@ -656,12 +652,12 @@ func envValid(env string) bool {
return true
}
func (v *Validator) rlimitValid(rlimit rspec.LinuxRlimit) (msgs []string) {
func (v *Validator) rlimitValid(rlimit rspec.POSIXRlimit) (msgs []string) {
if rlimit.Hard < rlimit.Soft {
msgs = append(msgs, fmt.Sprintf("hard limit of rlimit %s should not be less than soft limit", rlimit.Type))
}
if v.spec.Platform.OS == "linux" {
if v.platform == "linux" {
for _, val := range defaultRlimits {
if val == rlimit.Type {
return
@ -669,7 +665,7 @@ func (v *Validator) rlimitValid(rlimit rspec.LinuxRlimit) (msgs []string) {
}
msgs = append(msgs, fmt.Sprintf("rlimit type %q is invalid", rlimit.Type))
} else {
logrus.Warnf("process.rlimits validation not yet implemented for OS %q", v.spec.Platform.OS)
logrus.Warnf("process.rlimits validation not yet implemented for platform %q", v.platform)
}
return