Merge pull request #211 from runcom/seccomp-take-1

Add support for Seccomp
This commit is contained in:
Mrunal Patel 2016-11-28 14:48:00 -08:00 committed by GitHub
commit d6dfd0bf23
90 changed files with 4745 additions and 629 deletions

View file

@ -11,6 +11,7 @@ for d in $(find . -type d -not -iwholename '*.git*' -a -not -iname '.tool' -a -n
--exclude='duplicate of.*_test.go.*\(dupl\)$' \
--exclude='cmd\/client\/.*\.go.*\(dupl\)$' \
--exclude='vendor\/src\/.*' \
--exclude='server\/seccomp\/.*\.go.*$' \
--disable=aligncheck \
--disable=gotype \
--disable=gas \

View file

@ -13,6 +13,7 @@ BINDIR ?= ${PREFIX}/bin
LIBEXECDIR ?= ${PREFIX}/libexec
MANDIR ?= ${PREFIX}/share/man
ETCDIR ?= ${DESTDIR}/etc
ETCDIR_OCID ?= ${ETCDIR}/ocid
GO_MD2MAN ?= $(shell which go-md2man)
export GOPATH := ${CURDIR}/vendor
BUILDTAGS := selinux
@ -99,7 +100,8 @@ install:
install -d -m 755 $(MANDIR)/man{8,5}
install -m 644 $(filter %.8,$(MANPAGES)) -t $(MANDIR)/man8
install -m 644 $(filter %.5,$(MANPAGES)) -t $(MANDIR)/man5
install -D -m 644 ocid.conf $(ETCDIR)/ocid.conf
install -D -m 644 ocid.conf $(ETCDIR_OCID)/ocid.conf
install -D -m 644 seccomp.json $(ETCDIR_OCID)/seccomp.json
install.systemd:
install -D -m 644 contrib/systemd/ocid.service $(PREFIX)/lib/systemd/system/ocid.service

View file

@ -329,6 +329,8 @@ func CreateContainer(client pb.RuntimeServiceClient, opts createOptions) error {
r, err := client.CreateContainer(context.Background(), &pb.CreateContainerRequest{
PodSandboxId: &opts.podID,
Config: config,
// TODO(runcom): this is missing PodSandboxConfig!!!
// we should/could find a way to retrieve it from the fs and set it here
})
if err != nil {
return err

View file

@ -11,9 +11,10 @@ import (
)
const (
ocidRoot = "/var/lib/ocid"
conmonPath = "/usr/libexec/ocid/conmon"
pausePath = "/usr/libexec/ocid/pause"
ocidRoot = "/var/lib/ocid"
conmonPath = "/usr/libexec/ocid/conmon"
pausePath = "/usr/libexec/ocid/pause"
seccompProfilePath = "/etc/ocid/seccomp.json"
)
var commentedConfigTemplate = template.Must(template.New("config").Parse(`
@ -59,6 +60,10 @@ conmon_env = [
# on the host.
selinux = {{ .SELinux }}
# seccomp_profile is the seccomp json profile path which is used as the
# default for the runtime.
seccomp_profile = "{{ .SeccompProfile }}"
# The "ocid.image" table contains settings pertaining to the
# management of OCI images.
[ocid.image]
@ -89,7 +94,8 @@ func DefaultConfig() *server.Config {
ConmonEnv: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
},
SELinux: selinux.SelinuxEnabled(),
SELinux: selinux.SelinuxEnabled(),
SeccompProfile: seccompProfilePath,
},
ImageConfig: server.ImageConfig{
Pause: pausePath,

View file

@ -14,7 +14,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
const ociConfigPath = "/etc/ocid.conf"
const ociConfigPath = "/etc/ocid/ocid.conf"
func mergeConfig(config *server.Config, ctx *cli.Context) error {
// Don't parse the config if the user explicitly set it to "".
@ -56,6 +56,9 @@ func mergeConfig(config *server.Config, ctx *cli.Context) error {
if ctx.GlobalIsSet("selinux") {
config.SELinux = ctx.GlobalBool("selinux")
}
if ctx.GlobalIsSet("seccomp-profile") {
config.SeccompProfile = ctx.GlobalString("seccomp-profile")
}
return nil
}
@ -128,6 +131,10 @@ func main() {
Name: "sandboxdir",
Usage: "ocid pod sandbox dir",
},
cli.StringFlag{
Name: "seccomp-profile",
Usage: "default seccomp profile path",
},
cli.BoolFlag{
Name: "selinux",
Usage: "enable selinux support",

View file

@ -19,6 +19,7 @@ ocid - Enable OCI Kubernetes Container Runtime daemon
[**--runtime**=[*value*]]
[**--sandboxdir**=[*value*]]
[**--selinux**]
[**--seccomp-profile**=[*value*]]
[**--version**|**-v**]
# DESCRIPTION
@ -78,6 +79,9 @@ ocid is meant to provide an integration path between OCI conformant runtimes and
**--selinux**
Enable selinux support (default: false)
**seccomp_profile**
Path to the seccomp json profile to be used as the runtime's default (default: "/etc/ocid/seccomp.json")
**--version, -v**
Print the version

View file

@ -58,6 +58,9 @@ The `ocid` table supports the following options:
**selinux**
Enable selinux support (default: false)
**seccomp_profile**
Path to the seccomp json profile to be used as the runtime's default (default: "/etc/ocid/seccomp.json")
## OCID.IMAGE TABLE
**pause**=""

View file

@ -61,8 +61,8 @@ clone git github.com/containernetworking/cni master
clone git k8s.io/kubernetes 43110dd64d058786e975ce30d4c12a4853d1778c https://github.com/kubernetes/kubernetes
clone git google.golang.org/grpc v1.0.1-GA https://github.com/grpc/grpc-go.git
clone git github.com/opencontainers/runtime-spec bb6925ea99f0e366a3f7d1c975f6577475ca25f0
clone git github.com/docker/distribution 77b9d2997abcded79a5314970fe69a44c93c25fb
clone git github.com/vbatts/tar-split v0.9.11
clone git github.com/docker/distribution d22e09a6686c32be8c17b684b639da4b90efe320
clone git github.com/vbatts/tar-split v0.10.1
clone git github.com/docker/go-units f2145db703495b2e525c59662db69a7344b00bb8
clone git github.com/docker/go-connections 988efe982fdecb46f01d53465878ff1f2ff411ce
clone git github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
@ -77,5 +77,6 @@ clone git github.com/syndtr/gocapability 2c00daeb6c3b45114c80ac44119e7b8801fdd85
clone git github.com/gogo/protobuf 43a2e0b1c32252bfbbdf81f7faa7a88fb3fa4028
clone git github.com/gorilla/context v1.1
clone git golang.org/x/sys 8f0908ab3b2457e2e15403d3697c9ef5cb4b57a9 https://github.com/golang/sys.git
clone git github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
clean

701
seccomp.json Normal file
View file

@ -0,0 +1,701 @@
{
"defaultAction": "SCMP_ACT_ERRNO",
"archMap": [
{
"architecture": "SCMP_ARCH_X86_64",
"subArchitectures": [
"SCMP_ARCH_X86",
"SCMP_ARCH_X32"
]
},
{
"architecture": "SCMP_ARCH_AARCH64",
"subArchitectures": [
"SCMP_ARCH_ARM"
]
},
{
"architecture": "SCMP_ARCH_MIPS64",
"subArchitectures": [
"SCMP_ARCH_MIPS",
"SCMP_ARCH_MIPS64N32"
]
},
{
"architecture": "SCMP_ARCH_MIPS64N32",
"subArchitectures": [
"SCMP_ARCH_MIPS",
"SCMP_ARCH_MIPS64"
]
},
{
"architecture": "SCMP_ARCH_MIPSEL64",
"subArchitectures": [
"SCMP_ARCH_MIPSEL",
"SCMP_ARCH_MIPSEL64N32"
]
},
{
"architecture": "SCMP_ARCH_MIPSEL64N32",
"subArchitectures": [
"SCMP_ARCH_MIPSEL",
"SCMP_ARCH_MIPSEL64"
]
},
{
"architecture": "SCMP_ARCH_S390X",
"subArchitectures": [
"SCMP_ARCH_S390"
]
}
],
"syscalls": [
{
"names": [
"accept",
"accept4",
"access",
"alarm",
"alarm",
"bind",
"brk",
"capget",
"capset",
"chdir",
"chmod",
"chown",
"chown32",
"clock_getres",
"clock_gettime",
"clock_nanosleep",
"close",
"connect",
"copy_file_range",
"creat",
"dup",
"dup2",
"dup3",
"epoll_create",
"epoll_create1",
"epoll_ctl",
"epoll_ctl_old",
"epoll_pwait",
"epoll_wait",
"epoll_wait_old",
"eventfd",
"eventfd2",
"execve",
"execveat",
"exit",
"exit_group",
"faccessat",
"fadvise64",
"fadvise64_64",
"fallocate",
"fanotify_mark",
"fchdir",
"fchmod",
"fchmodat",
"fchown",
"fchown32",
"fchownat",
"fcntl",
"fcntl64",
"fdatasync",
"fgetxattr",
"flistxattr",
"flock",
"fork",
"fremovexattr",
"fsetxattr",
"fstat",
"fstat64",
"fstatat64",
"fstatfs",
"fstatfs64",
"fsync",
"ftruncate",
"ftruncate64",
"futex",
"futimesat",
"getcpu",
"getcwd",
"getdents",
"getdents64",
"getegid",
"getegid32",
"geteuid",
"geteuid32",
"getgid",
"getgid32",
"getgroups",
"getgroups32",
"getitimer",
"getpeername",
"getpgid",
"getpgrp",
"getpid",
"getppid",
"getpriority",
"getrandom",
"getresgid",
"getresgid32",
"getresuid",
"getresuid32",
"getrlimit",
"get_robust_list",
"getrusage",
"getsid",
"getsockname",
"getsockopt",
"get_thread_area",
"gettid",
"gettimeofday",
"getuid",
"getuid32",
"getxattr",
"inotify_add_watch",
"inotify_init",
"inotify_init1",
"inotify_rm_watch",
"io_cancel",
"ioctl",
"io_destroy",
"io_getevents",
"ioprio_get",
"ioprio_set",
"io_setup",
"io_submit",
"ipc",
"kill",
"lchown",
"lchown32",
"lgetxattr",
"link",
"linkat",
"listen",
"listxattr",
"llistxattr",
"_llseek",
"lremovexattr",
"lseek",
"lsetxattr",
"lstat",
"lstat64",
"madvise",
"memfd_create",
"mincore",
"mkdir",
"mkdirat",
"mknod",
"mknodat",
"mlock",
"mlock2",
"mlockall",
"mmap",
"mmap2",
"mprotect",
"mq_getsetattr",
"mq_notify",
"mq_open",
"mq_timedreceive",
"mq_timedsend",
"mq_unlink",
"mremap",
"msgctl",
"msgget",
"msgrcv",
"msgsnd",
"msync",
"munlock",
"munlockall",
"munmap",
"nanosleep",
"newfstatat",
"_newselect",
"open",
"openat",
"pause",
"pipe",
"pipe2",
"poll",
"ppoll",
"prctl",
"pread64",
"preadv",
"prlimit64",
"pselect6",
"pwrite64",
"pwritev",
"read",
"readahead",
"readlink",
"readlinkat",
"readv",
"recv",
"recvfrom",
"recvmmsg",
"recvmsg",
"remap_file_pages",
"removexattr",
"rename",
"renameat",
"renameat2",
"restart_syscall",
"rmdir",
"rt_sigaction",
"rt_sigpending",
"rt_sigprocmask",
"rt_sigqueueinfo",
"rt_sigreturn",
"rt_sigsuspend",
"rt_sigtimedwait",
"rt_tgsigqueueinfo",
"sched_getaffinity",
"sched_getattr",
"sched_getparam",
"sched_get_priority_max",
"sched_get_priority_min",
"sched_getscheduler",
"sched_rr_get_interval",
"sched_setaffinity",
"sched_setattr",
"sched_setparam",
"sched_setscheduler",
"sched_yield",
"seccomp",
"select",
"semctl",
"semget",
"semop",
"semtimedop",
"send",
"sendfile",
"sendfile64",
"sendmmsg",
"sendmsg",
"sendto",
"setfsgid",
"setfsgid32",
"setfsuid",
"setfsuid32",
"setgid",
"setgid32",
"setgroups",
"setgroups32",
"setitimer",
"setpgid",
"setpriority",
"setregid",
"setregid32",
"setresgid",
"setresgid32",
"setresuid",
"setresuid32",
"setreuid",
"setreuid32",
"setrlimit",
"set_robust_list",
"setsid",
"setsockopt",
"set_thread_area",
"set_tid_address",
"setuid",
"setuid32",
"setxattr",
"shmat",
"shmctl",
"shmdt",
"shmget",
"shutdown",
"sigaltstack",
"signalfd",
"signalfd4",
"sigreturn",
"socket",
"socketcall",
"socketpair",
"splice",
"stat",
"stat64",
"statfs",
"statfs64",
"symlink",
"symlinkat",
"sync",
"sync_file_range",
"syncfs",
"sysinfo",
"syslog",
"tee",
"tgkill",
"time",
"timer_create",
"timer_delete",
"timerfd_create",
"timerfd_gettime",
"timerfd_settime",
"timer_getoverrun",
"timer_gettime",
"timer_settime",
"times",
"tkill",
"truncate",
"truncate64",
"ugetrlimit",
"umask",
"uname",
"unlink",
"unlinkat",
"utime",
"utimensat",
"utimes",
"vfork",
"vmsplice",
"wait4",
"waitid",
"waitpid",
"write",
"writev",
"mount",
"umount2",
"reboot",
"name_to_handle_at",
"unshare"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {},
"excludes": {}
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 0,
"valueTwo": 0,
"op": "SCMP_CMP_EQ"
}
],
"comment": "",
"includes": {},
"excludes": {}
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 8,
"valueTwo": 0,
"op": "SCMP_CMP_EQ"
}
],
"comment": "",
"includes": {},
"excludes": {}
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 4294967295,
"valueTwo": 0,
"op": "SCMP_CMP_EQ"
}
],
"comment": "",
"includes": {},
"excludes": {}
},
{
"names": [
"breakpoint",
"cacheflush",
"set_tls"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"arches": [
"arm",
"arm64"
]
},
"excludes": {}
},
{
"names": [
"arch_prctl"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"arches": [
"amd64",
"x32"
]
},
"excludes": {}
},
{
"names": [
"modify_ldt"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"arches": [
"amd64",
"x32",
"x86"
]
},
"excludes": {}
},
{
"names": [
"s390_pci_mmio_read",
"s390_pci_mmio_write",
"s390_runtime_instr"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"arches": [
"s390",
"s390x"
]
},
"excludes": {}
},
{
"names": [
"open_by_handle_at"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_DAC_READ_SEARCH"
]
},
"excludes": {}
},
{
"names": [
"bpf",
"clone",
"fanotify_init",
"lookup_dcookie",
"mount",
"name_to_handle_at",
"perf_event_open",
"setdomainname",
"sethostname",
"setns",
"umount",
"umount2",
"unshare"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_ADMIN"
]
},
"excludes": {}
},
{
"names": [
"clone"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 2080505856,
"valueTwo": 0,
"op": "SCMP_CMP_MASKED_EQ"
}
],
"comment": "",
"includes": {},
"excludes": {
"caps": [
"CAP_SYS_ADMIN"
],
"arches": [
"s390",
"s390x"
]
}
},
{
"names": [
"clone"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 1,
"value": 2080505856,
"valueTwo": 0,
"op": "SCMP_CMP_MASKED_EQ"
}
],
"comment": "s390 parameter ordering for clone is different",
"includes": {
"arches": [
"s390",
"s390x"
]
},
"excludes": {
"caps": [
"CAP_SYS_ADMIN"
]
}
},
{
"names": [
"reboot"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_BOOT"
]
},
"excludes": {}
},
{
"names": [
"chroot"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_CHROOT"
]
},
"excludes": {}
},
{
"names": [
"delete_module",
"init_module",
"finit_module",
"query_module"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_MODULE"
]
},
"excludes": {}
},
{
"names": [
"acct"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_PACCT"
]
},
"excludes": {}
},
{
"names": [
"kcmp",
"process_vm_readv",
"process_vm_writev",
"ptrace"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_PTRACE"
]
},
"excludes": {}
},
{
"names": [
"iopl",
"ioperm"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_RAWIO"
]
},
"excludes": {}
},
{
"names": [
"settimeofday",
"stime",
"adjtimex"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_TIME"
]
},
"excludes": {}
},
{
"names": [
"vhangup"
],
"action": "SCMP_ACT_ALLOW",
"args": [],
"comment": "",
"includes": {
"caps": [
"CAP_SYS_TTY_CONFIG"
]
},
"excludes": {}
}
]
}

View file

@ -64,6 +64,10 @@ type RuntimeConfig struct {
// SELinux determines whether or not SELinux is used for pod separation.
SELinux bool `toml:"selinux"`
// SeccompProfile is the seccomp json profile path which is used as the
// default for the runtime.
SeccompProfile string `toml:"seccomp_profile"`
}
// ImageConfig represents the "ocid.image" TOML config table.

View file

@ -6,11 +6,13 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/stringid"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/kubernetes-incubator/cri-o/server/seccomp"
"github.com/kubernetes-incubator/cri-o/utils"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runtime-tools/generate"
@ -18,6 +20,12 @@ import (
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
const (
seccompUnconfined = "unconfined"
seccompRuntimeDefault = "runtime/default"
seccompLocalhostPrefix = "localhost/"
)
// CreateContainer creates a new container in specified PodSandbox
func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) {
logrus.Debugf("CreateContainerRequest %+v", req)
@ -73,7 +81,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
return nil, err
}
container, err := s.createSandboxContainer(containerID, containerName, sb, req.GetSandboxConfig(), containerDir, containerConfig)
container, err := s.createSandboxContainer(containerID, containerName, sb, containerDir, containerConfig)
if err != nil {
return nil, err
}
@ -101,7 +109,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
return resp, nil
}
func (s *Server) createSandboxContainer(containerID string, containerName string, sb *sandbox, SandboxConfig *pb.PodSandboxConfig, containerDir string, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
func (s *Server) createSandboxContainer(containerID string, containerName string, sb *sandbox, containerDir string, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
if sb == nil {
return nil, errors.New("createSandboxContainer needs a sandbox")
}
@ -282,6 +290,10 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
}
specgen.AddAnnotation("ocid/labels", string(labelsJSON))
if err = s.setupSeccomp(&specgen, containerName, sb.annotations); err != nil {
return nil, err
}
if err = specgen.SaveToFile(filepath.Join(containerDir, "config.json"), generate.ExportOptions{}); err != nil {
return nil, err
}
@ -310,6 +322,41 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
return container, nil
}
func (s *Server) setupSeccomp(specgen *generate.Generator, cname string, sbAnnotations map[string]string) error {
profile, ok := sbAnnotations["security.alpha.kubernetes.io/seccomp/container/"+cname]
if !ok {
profile, ok = sbAnnotations["security.alpha.kubernetes.io/seccomp/pod"]
if !ok {
// running w/o seccomp, aka unconfined
profile = seccompUnconfined
}
}
if !s.seccompEnabled {
if profile != seccompUnconfined {
return fmt.Errorf("seccomp is not enabled in your kernel, cannot run with a profile")
}
logrus.Warn("seccomp is not enabled in your kernel, running container without profile")
}
if profile == seccompUnconfined {
// running w/o seccomp, aka unconfined
specgen.Spec().Linux.Seccomp = nil
return nil
}
if profile == seccompRuntimeDefault {
return seccomp.LoadProfileFromStruct(s.seccompProfile, specgen)
}
if !strings.HasPrefix(profile, seccompLocalhostPrefix) {
return fmt.Errorf("unknown seccomp profile option: %q", profile)
}
//file, err := ioutil.ReadFile(filepath.Join(s.seccompProfileRoot, strings.TrimPrefix(profile, seccompLocalhostPrefix)))
//if err != nil {
//return err
//}
// TODO(runcom): setup from provided node's seccomp profile
// can't do this yet, see https://issues.k8s.io/36997
return nil
}
func (s *Server) generateContainerIDandName(podName string, name string, attempt uint32) (string, string, error) {
var (
err error

147
server/seccomp/seccomp.go Normal file
View file

@ -0,0 +1,147 @@
package seccomp
import (
"encoding/json"
"errors"
"fmt"
"github.com/docker/docker/pkg/stringutils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
libseccomp "github.com/seccomp/libseccomp-golang"
)
// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec.
func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error {
return setupSeccomp(&config, specgen)
}
// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile.
func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error {
var config Seccomp
if err := json.Unmarshal(body, &config); err != nil {
return fmt.Errorf("decoding seccomp profile failed: %v", err)
}
return setupSeccomp(&config, specgen)
}
var nativeToSeccomp = map[string]Arch{
"amd64": ArchX86_64,
"arm64": ArchAARCH64,
"mips64": ArchMIPS64,
"mips64n32": ArchMIPS64N32,
"mipsel64": ArchMIPSEL64,
"mipsel64n32": ArchMIPSEL64N32,
"s390x": ArchS390X,
}
func setupSeccomp(config *Seccomp, specgen *generate.Generator) error {
if config == nil {
return nil
}
// No default action specified, no syscalls listed, assume seccomp disabled
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
return nil
}
var arch string
var native, err = libseccomp.GetNativeArch()
if err == nil {
arch = native.String()
}
if len(config.Architectures) != 0 && len(config.ArchMap) != 0 {
return errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'")
}
customspec := specgen.Spec()
customspec.Linux.Seccomp = &specs.Seccomp{}
// if config.Architectures == 0 then libseccomp will figure out the architecture to use
if len(config.Architectures) != 0 {
for _, a := range config.Architectures {
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a))
}
}
if len(config.ArchMap) != 0 {
for _, a := range config.ArchMap {
seccompArch, ok := nativeToSeccomp[arch]
if ok {
if a.Arch == seccompArch {
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a.Arch))
for _, sa := range a.SubArches {
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(sa))
}
break
}
}
}
}
customspec.Linux.Seccomp.DefaultAction = specs.Action(config.DefaultAction)
Loop:
// Loop through all syscall blocks and convert them to libcontainer format after filtering them
for _, call := range config.Syscalls {
if len(call.Excludes.Arches) > 0 {
if stringutils.InSlice(call.Excludes.Arches, arch) {
continue Loop
}
}
if len(call.Excludes.Caps) > 0 {
for _, c := range call.Excludes.Caps {
if stringutils.InSlice(customspec.Process.Capabilities, c) {
continue Loop
}
}
}
if len(call.Includes.Arches) > 0 {
if !stringutils.InSlice(call.Includes.Arches, arch) {
continue Loop
}
}
if len(call.Includes.Caps) > 0 {
for _, c := range call.Includes.Caps {
if !stringutils.InSlice(customspec.Process.Capabilities, c) {
continue Loop
}
}
}
if call.Name != "" && len(call.Names) != 0 {
return errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'")
}
if call.Name != "" {
customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args))
}
for _, n := range call.Names {
customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(n, call.Action, call.Args))
}
}
return nil
}
func createSpecsSyscall(name string, action Action, args []*Arg) specs.Syscall {
newCall := specs.Syscall{
Name: name,
Action: specs.Action(action),
}
// Loop through all the arguments of the syscall and convert them
for _, arg := range args {
newArg := specs.Arg{
Index: arg.Index,
Value: arg.Value,
ValueTwo: arg.ValueTwo,
Op: specs.Operator(arg.Op),
}
newCall.Args = append(newCall.Args, newArg)
}
return newCall
}

93
server/seccomp/types.go Normal file
View file

@ -0,0 +1,93 @@
package seccomp
// Seccomp represents the config for a seccomp profile for syscall restriction.
type Seccomp struct {
DefaultAction Action `json:"defaultAction"`
// Architectures is kept to maintain backward compatibility with the old
// seccomp profile.
Architectures []Arch `json:"architectures,omitempty"`
ArchMap []Architecture `json:"archMap,omitempty"`
Syscalls []*Syscall `json:"syscalls"`
}
// Architecture is used to represent an specific architecture
// and its sub-architectures
type Architecture struct {
Arch Arch `json:"architecture"`
SubArches []Arch `json:"subArchitectures"`
}
// Arch used for architectures
type Arch string
// Additional architectures permitted to be used for system calls
// By default only the native architecture of the kernel is permitted
const (
ArchX86 Arch = "SCMP_ARCH_X86"
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
ArchX32 Arch = "SCMP_ARCH_X32"
ArchARM Arch = "SCMP_ARCH_ARM"
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
ArchMIPS Arch = "SCMP_ARCH_MIPS"
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
ArchPPC Arch = "SCMP_ARCH_PPC"
ArchPPC64 Arch = "SCMP_ARCH_PPC64"
ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE"
ArchS390 Arch = "SCMP_ARCH_S390"
ArchS390X Arch = "SCMP_ARCH_S390X"
)
// Action taken upon Seccomp rule match
type Action string
// Define actions for Seccomp rules
const (
ActKill Action = "SCMP_ACT_KILL"
ActTrap Action = "SCMP_ACT_TRAP"
ActErrno Action = "SCMP_ACT_ERRNO"
ActTrace Action = "SCMP_ACT_TRACE"
ActAllow Action = "SCMP_ACT_ALLOW"
)
// Operator used to match syscall arguments in Seccomp
type Operator string
// Define operators for syscall arguments in Seccomp
const (
OpNotEqual Operator = "SCMP_CMP_NE"
OpLessThan Operator = "SCMP_CMP_LT"
OpLessEqual Operator = "SCMP_CMP_LE"
OpEqualTo Operator = "SCMP_CMP_EQ"
OpGreaterEqual Operator = "SCMP_CMP_GE"
OpGreaterThan Operator = "SCMP_CMP_GT"
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
)
// Arg used for matching specific syscall arguments in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"valueTwo"`
Op Operator `json:"op"`
}
// Filter is used to conditionally apply Seccomp rules
type Filter struct {
Caps []string `json:"caps,omitempty"`
Arches []string `json:"arches,omitempty"`
}
// Syscall is used to match a group of syscalls in Seccomp
type Syscall struct {
Name string `json:"name,omitempty"`
Names []string `json:"names,omitempty"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
Comment string `json:"comment"`
Includes Filter `json:"includes"`
Excludes Filter `json:"excludes"`
}

View file

@ -7,11 +7,13 @@ import (
"os"
"path/filepath"
"sync"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/registrar"
"github.com/docker/docker/pkg/truncindex"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/kubernetes-incubator/cri-o/server/seccomp"
"github.com/kubernetes-incubator/cri-o/utils"
"github.com/opencontainers/runc/libcontainer/label"
rspec "github.com/opencontainers/runtime-spec/specs-go"
@ -34,6 +36,9 @@ type Server struct {
podIDIndex *truncindex.TruncIndex
ctrNameIndex *registrar.Registrar
ctrIDIndex *truncindex.TruncIndex
seccompEnabled bool
seccompProfile seccomp.Seccomp
}
func (s *Server) loadContainer(id string) error {
@ -223,6 +228,23 @@ func (s *Server) releaseContainerName(name string) {
s.ctrNameIndex.Release(name)
}
const (
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
SeccompModeFilter = uintptr(2)
)
func seccompEnabled() bool {
var enabled bool
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
enabled = true
}
}
return enabled
}
// New creates a new Server with options provided
func New(config *Config) (*Server, error) {
// TODO: This will go away later when we have wrapper process or systemd acting as
@ -259,7 +281,17 @@ func New(config *Config) (*Server, error) {
sandboxes: sandboxes,
containers: containers,
},
seccompEnabled: seccompEnabled(),
}
seccompProfile, err := ioutil.ReadFile(config.SeccompProfile)
if err != nil {
return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, err)
}
var seccompConfig seccomp.Seccomp
if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil {
return nil, fmt.Errorf("decoding seccomp profile failed: %v", err)
}
s.seccompProfile = seccompConfig
s.podIDIndex = truncindex.NewTruncIndex([]string{})
s.podNameIndex = registrar.NewRegistrar()

View file

@ -17,6 +17,8 @@ OCIC_BINARY=${OCIC_BINARY:-${OCID_ROOT}/cri-o/ocic}
CONMON_BINARY=${CONMON_BINARY:-${OCID_ROOT}/cri-o/conmon/conmon}
# Path of the pause binary.
PAUSE_BINARY=${PAUSE_BINARY:-${OCID_ROOT}/cri-o/pause/pause}
# Path of the default seccomp profile
SECCOMP_PROFILE=${SECCOMP_PROFILE:-${OCID_ROOT}/cri-o/seccomp.json}
# Path of the runc binary.
RUNC_PATH=$(command -v runc || true)
RUNC_BINARY=${RUNC_PATH:-/usr/local/sbin/runc}
@ -78,7 +80,13 @@ function wait_until_reachable() {
# Start ocid.
function start_ocid() {
"$OCID_BINARY" --conmon "$CONMON_BINARY" --pause "$PAUSE_BINARY" --listen "$OCID_SOCKET" --runtime "$RUNC_BINARY" --root "$TESTDIR/ocid" --sandboxdir "$TESTDIR/sandboxes" --containerdir "$TESTDIR/ocid/containers" config >$OCID_CONFIG
"$OCID_BINARY" --conmon "$CONMON_BINARY" --pause "$PAUSE_BINARY" --listen "$OCID_SOCKET" --runtime "$RUNC_BINARY" --root "$TESTDIR/ocid" --sandboxdir "$TESTDIR/sandboxes" --containerdir "$TESTDIR/ocid/containers" --seccomp-profile "$SECCOMP_PROFILE" config >$OCID_CONFIG
"$OCID_BINARY" --debug --config "$OCID_CONFIG" & OCID_PID=$!
wait_until_reachable
}
function start_ocid_with_seccomp_path() {
"$OCID_BINARY" --conmon "$CONMON_BINARY" --pause "$PAUSE_BINARY" --listen "$OCID_SOCKET" --runtime "$RUNC_BINARY" --root "$TESTDIR/ocid" --sandboxdir "$TESTDIR/sandboxes" --containerdir "$TESTDIR/ocid/containers" --seccomp-profile "$1" config >$OCID_CONFIG
"$OCID_BINARY" --debug --config "$OCID_CONFIG" & OCID_PID=$!
wait_until_reachable
}
@ -89,7 +97,7 @@ function cleanup_ctrs() {
if [ "$output" != "" ]; then
printf '%s\n' "$output" | while IFS= read -r line
do
ocic ctr stop --id "$line"
ocic ctr stop --id "$line" || true
ocic ctr remove --id "$line"
done
fi
@ -102,7 +110,7 @@ function cleanup_pods() {
if [ "$output" != "" ]; then
printf '%s\n' "$output" | while IFS= read -r line
do
ocic pod stop --id "$line"
ocic pod stop --id "$line" || true
ocic pod remove --id "$line"
done
fi

307
test/seccomp.bats Normal file
View file

@ -0,0 +1,307 @@
#!/usr/bin/env bats
load helpers
function teardown() {
cleanup_test
}
# 1. test running with ctr unconfined
# test that we can run with a syscall which would be otherwise blocked
@test "ctr seccomp profiles unconfined" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/container\/redhat\.test\.ocid-seccomp1-1-testname-0": "unconfined"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp1.json
run ocic pod create --name seccomp1 --config "$TESTDIR"/seccomp1.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --name testname --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr execsync --id "$ctr_id" chmod 777 .
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# 2. test running with ctr runtime/default
# test that we cannot run with a syscall blocked by the default seccomp profile
@test "ctr seccomp profiles runtime/default" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/container\/redhat\.test\.ocid-seccomp2-1-testname2-0": "runtime\/default"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp2.json
run ocic pod create --name seccomp2 --config "$TESTDIR"/seccomp2.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --name testname2 --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr execsync --id "$ctr_id" chmod 777 .
echo "$output"
[ "$status" -ne 0 ]
[[ "$output" =~ "Operation not permitted" ]]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# 3. test running with ctr wrong profile name
@test "ctr seccomp profiles wrong profile name" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/container\/redhat\.test\.ocid-seccomp3-1-testname3-1": "notgood"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp3.json
run ocic pod create --name seccomp3 --config "$TESTDIR"/seccomp3.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --name testname3 --config "$TESTDATA"/container_config.json --pod "$pod_id"
echo "$output"
[ "$status" -ne 0 ]
[[ "$output" =~ "unknown seccomp profile option:" ]]
[[ "$output" =~ "notgood" ]]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# TODO(runcom): need https://issues.k8s.io/36997
# 4. test running with ctr localhost/profile_name
@test "ctr seccomp profiles localhost/profile_name" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
#sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
#sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
#sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
#start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
skip "need https://issues.k8s.io/36997"
}
# 5. test running with unkwown ctr profile falls back to pod profile
# unknown ctr -> unconfined
# pod -> runtime/default
# result: fail chmod
@test "ctr seccomp profiles falls back to pod profile" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/container\/redhat\.test\.ocid-seccomp2-1-testname2-0-not-exists": "unconfined", "security\.alpha\.kubernetes\.io\/seccomp\/pod": "runtime\/default"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp5.json
run ocic pod create --name seccomp5 --config "$TESTDIR"/seccomp5.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr execsync --id "$ctr_id" chmod 777 .
echo "$output"
[ "$status" -ne 0 ]
[[ "$output" =~ "Operation not permitted" ]]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# 6. test running with unkwown ctr profile and no pod, falls back to unconfined
# unknown ctr -> runtime/default
# pod -> NO
# result: success, running unconfined
@test "ctr seccomp profiles falls back to unconfined" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/container\/redhat\.test\.ocid-seccomp6-1-testname6-0-not-exists": "runtime-default"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp6.json
run ocic pod create --name seccomp6 --config "$TESTDIR"/seccomp6.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --name testname6 --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr execsync --id "$ctr_id" chmod 777 .
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# 1. test running with pod unconfined
# test that we can run with a syscall which would be otherwise blocked
@test "pod seccomp profiles unconfined" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/pod": "unconfined"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp1.json
run ocic pod create --name seccomp1 --config "$TESTDIR"/seccomp1.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr execsync --id "$ctr_id" chmod 777 .
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# 2. test running with pod runtime/default
# test that we cannot run with a syscall blocked by the default seccomp profile
@test "pod seccomp profiles runtime/default" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/pod": "runtime\/default"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp2.json
run ocic pod create --name seccomp2 --config "$TESTDIR"/seccomp2.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ocic ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ocic ctr execsync --id "$ctr_id" chmod 777 .
echo "$output"
[ "$status" -ne 0 ]
[[ "$output" =~ "Operation not permitted" ]]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# 3. test running with pod wrong profile name
@test "pod seccomp profiles wrong profile name" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
# 3. test running with pod wrong profile name
sed -e 's/%VALUE%/,"security\.alpha\.kubernetes\.io\/seccomp\/pod": "notgood"/g' "$TESTDATA"/sandbox_config_seccomp.json > "$TESTDIR"/seccomp3.json
run ocic pod create --name seccomp3 --config "$TESTDIR"/seccomp3.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run ocic ctr create --config "$TESTDATA"/container_config.json --pod "$pod_id"
echo "$output"
[ "$status" -ne 0 ]
[[ "$output" =~ "unknown seccomp profile option:" ]]
[[ "$output" =~ "notgood" ]]
cleanup_ctrs
cleanup_pods
stop_ocid
}
# TODO(runcom): need https://issues.k8s.io/36997
# 4. test running with pod localhost/profile_name
@test "pod seccomp profiles localhost/profile_name" {
# this test requires docker, thus it can't yet be run in a container
if [ "$TRAVIS" = "true" ]; then # instead of $TRAVIS, add a function is_containerized to skip here
skip "cannot yet run this test in a container, use sudo make localintegration"
fi
#sed -e 's/"chmod",//' "$OCID_ROOT"/cri-o/seccomp.json > "$TESTDIR"/seccomp_profile1.json
#sed -i 's/"fchmod",//' "$TESTDIR"/seccomp_profile1.json
#sed -i 's/"fchmodat",//g' "$TESTDIR"/seccomp_profile1.json
#start_ocid_with_seccomp_path "$TESTDIR"/seccomp_profile1.json
skip "need https://issues.k8s.io/36997"
}

View file

@ -0,0 +1,82 @@
{
"metadata": {
"name": "container1",
"attempt": 1
},
"image": {
"image": "docker://redis:latest"
},
"command": [
"/bin/bash"
],
"args": [
"/bin/chmod", "777", "."
],
"working_dir": "/",
"envs": [
{
"key": "PATH",
"value": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
},
{
"key": "TERM",
"value": "xterm"
},
{
"key": "TESTDIR",
"value": "test/dir1"
},
{
"key": "TESTFILE",
"value": "test/file1"
}
],
"labels": {
"type": "small",
"batch": "no"
},
"annotations": {
"owner": "dragon",
"daemon": "ocid"
},
"privileged": true,
"readonly_rootfs": true,
"log_path": "container.log",
"stdin": false,
"stdin_once": false,
"tty": false,
"linux": {
"resources": {
"cpu_period": 10000,
"cpu_quota": 20000,
"cpu_shares": 512,
"memory_limit_in_bytes": 88000000,
"oom_score_adj": 30
},
"capabilities": {
"add_capabilities": [
"setuid",
"setgid"
],
"drop_capabilities": [
"audit_write",
"audit_read"
]
},
"selinux_options": {
"user": "system_u",
"role": "system_r",
"type": "svirt_lxc_net_t",
"level": "s0:c4-c5"
},
"user": {
"uid": 5,
"gid": 300,
"additional_gids": [
400,
401,
402
]
}
}
}

View file

@ -48,7 +48,8 @@
"annotations": {
"owner": "hmeng",
"security.alpha.kubernetes.io/sysctls": "kernel.shm_rmid_forced=1,net.ipv4.ip_local_port_range=1024 65000",
"security.alpha.kubernetes.io/unsafe-sysctls": "kernel.msgmax=8192"
"security.alpha.kubernetes.io/unsafe-sysctls": "kernel.msgmax=8192" ,
"security.alpha.kubernetes.io/seccomp/pod": "unconfined"
},
"linux": {
"cgroup_parent": "podsandbox1.slice:container:infra",

View file

@ -0,0 +1,60 @@
{
"metadata": {
"name": "podsandbox1",
"uid": "redhat-test-ocid",
"namespace": "redhat.test.ocid",
"attempt": 1
},
"hostname": "ocic_host",
"log_directory": ".",
"dns_options": {
"servers": [
"server1.redhat.com",
"server2.redhat.com"
],
"searches": [
"8.8.8.8"
]
},
"port_mappings": [
{
"name": "port_map1",
"protocol": 1,
"container_port": 80,
"host_port": 4888,
"host_ip": "192.168.0.33"
},
{
"name": "port_map2",
"protocol": 2,
"container_port": 81,
"host_port": 4889,
"host_ip": "192.168.0.33"
}
],
"resources": {
"cpu": {
"limits": 3,
"requests": 2
},
"memory": {
"limits": 50000000,
"requests": 2000000
}
},
"labels": {
"group": "test"
},
"annotations": {
"owner": "hmeng"
%VALUE%
},
"linux": {
"cgroup_parent": "podsandbox1.slice:container:infra",
"namespace_options": {
"host_network": false,
"host_pid": false,
"host_ipc": false
}
}
}

View file

@ -1,6 +1,8 @@
a-palchikov <deemok@gmail.com>
Aaron Lehmann <aaron.lehmann@docker.com>
Aaron Schlesinger <aschlesinger@deis.com>
Aaron Vinson <avinson.public@gmail.com>
Adam Duke <adam.v.duke@gmail.com>
Adam Enger <adamenger@gmail.com>
Adrian Mouat <adrian.mouat@gmail.com>
Ahmet Alp Balkan <ahmetalpbalkan@gmail.com>
@ -19,6 +21,7 @@ Anis Elleuch <vadmeste@gmail.com>
Anton Tiurin <noxiouz@yandex.ru>
Antonio Mercado <amercado@thinknode.com>
Antonio Murdaca <runcom@redhat.com>
Anusha Ragunathan <anusha@docker.com>
Arien Holthuizen <aholthuizen@schubergphilis.com>
Arnaud Porterie <arnaud.porterie@docker.com>
Arthur Baars <arthur@semmle.com>
@ -26,12 +29,16 @@ Asuka Suzuki <hello@tanksuzuki.com>
Avi Miller <avi.miller@oracle.com>
Ayose Cazorla <ayosec@gmail.com>
BadZen <dave.trombley@gmail.com>
Ben Bodenmiller <bbodenmiller@hotmail.com>
Ben Firshman <ben@firshman.co.uk>
bin liu <liubin0329@gmail.com>
Brian Bland <brian.bland@docker.com>
burnettk <burnettk@gmail.com>
Carson A <ca@carsonoid.net>
Cezar Sa Espinola <cezarsa@gmail.com>
Charles Smith <charles.smith@docker.com>
Chris Dillon <squarism@gmail.com>
cuiwei13 <cuiwei13@pku.edu.cn>
cyli <cyli@twistedmatrix.com>
Daisuke Fujita <dtanshi45@gmail.com>
Daniel Huhn <daniel@danielhuhn.de>
@ -48,11 +55,14 @@ Diogo Mónica <diogo.monica@gmail.com>
DJ Enriquez <dj.enriquez@infospace.com>
Donald Huang <don.hcd@gmail.com>
Doug Davis <dug@us.ibm.com>
Edgar Lee <edgar.lee@docker.com>
Eric Yang <windfarer@gmail.com>
Fabio Berchtold <jamesclonk@jamesclonk.ch>
Fabio Huser <fabio@fh1.ch>
farmerworking <farmerworking@gmail.com>
Felix Yan <felixonmars@archlinux.org>
Florentin Raud <florentin.raud@gmail.com>
Frank Chen <frankchn@gmail.com>
Frederick F. Kautz IV <fkautz@alumni.cmu.edu>
gabriell nascimento <gabriell@bluesoft.com.br>
Gleb Schukin <gschukin@ptsecurity.com>
@ -64,16 +74,23 @@ HuKeping <hukeping@huawei.com>
Ian Babrou <ibobrik@gmail.com>
igayoso <igayoso@gmail.com>
Jack Griffin <jackpg14@gmail.com>
James Findley <jfindley@fastmail.com>
Jason Freidman <jason.freidman@gmail.com>
Jason Heiss <jheiss@aput.net>
Jeff Nickoloff <jeff@allingeek.com>
Jess Frazelle <acidburn@google.com>
Jessie Frazelle <jessie@docker.com>
jhaohai <jhaohai@foxmail.com>
Jianqing Wang <tsing@jianqing.org>
Jihoon Chung <jihoon@gmail.com>
Joao Fernandes <joao.fernandes@docker.com>
John Mulhausen <john@docker.com>
John Starks <jostarks@microsoft.com>
Jon Johnson <jonjohnson@google.com>
Jon Poler <jonathan.poler@apcera.com>
Jonathan Boulle <jonathanboulle@gmail.com>
Jordan Liggitt <jliggitt@redhat.com>
Josh Chorlton <josh.chorlton@docker.com>
Josh Hawn <josh.hawn@docker.com>
Julien Fernandez <julien.fernandez@gmail.com>
Ke Xu <leonhartx.k@gmail.com>
@ -84,22 +101,30 @@ Kenny Leung <kleung@google.com>
Li Yi <denverdino@gmail.com>
Liu Hua <sdu.liu@huawei.com>
liuchang0812 <liuchang0812@gmail.com>
Lloyd Ramey <lnr0626@gmail.com>
Louis Kottmann <louis.kottmann@gmail.com>
Luke Carpenter <x@rubynerd.net>
Marcus Martins <marcus@docker.com>
Mary Anthony <mary@docker.com>
Matt Bentley <mbentley@mbentley.net>
Matt Duch <matt@learnmetrics.com>
Matt Moore <mattmoor@google.com>
Matt Robenolt <matt@ydekproductions.com>
Matthew Green <greenmr@live.co.uk>
Michael Prokop <mika@grml.org>
Michal Minar <miminar@redhat.com>
Michal Minář <miminar@redhat.com>
Mike Brown <brownwm@us.ibm.com>
Miquel Sabaté <msabate@suse.com>
Misty Stanley-Jones <misty@apache.org>
Misty Stanley-Jones <misty@docker.com>
Morgan Bauer <mbauer@us.ibm.com>
moxiegirl <mary@docker.com>
Nathan Sullivan <nathan@nightsys.net>
nevermosby <robolwq@qq.com>
Nghia Tran <tcnghia@gmail.com>
Nikita Tarasov <nikita@mygento.ru>
Noah Treuhaft <noah.treuhaft@docker.com>
Nuutti Kotivuori <nuutti.kotivuori@poplatek.fi>
Oilbeater <liumengxinfly@gmail.com>
Olivier Gambier <olivier@docker.com>
@ -108,17 +133,23 @@ Omer Cohen <git@omer.io>
Patrick Devine <patrick.devine@docker.com>
Phil Estes <estesp@linux.vnet.ibm.com>
Philip Misiowiec <philip@atlashealth.com>
Pierre-Yves Ritschard <pyr@spootnik.org>
Qiao Anran <qiaoanran@gmail.com>
Randy Barlow <randy@electronsweatshop.com>
Richard Scothern <richard.scothern@docker.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Rusty Conover <rusty@luckydinosaur.com>
Sean Boran <Boran@users.noreply.github.com>
Sebastiaan van Stijn <github@gone.nl>
Sebastien Coavoux <s.coavoux@free.fr>
Serge Dubrouski <sergeyfd@gmail.com>
Sharif Nassar <sharif@mrwacky.com>
Shawn Falkner-Horine <dreadpirateshawn@gmail.com>
Shreyas Karnik <karnik.shreyas@gmail.com>
Simon Thulbourn <simon+github@thulbourn.com>
spacexnice <yaoyao.xyy@alibaba-inc.com>
Spencer Rinehart <anubis@overthemonkey.com>
Stan Hu <stanhu@gmail.com>
Stefan Majewsky <stefan.majewsky@sap.com>
Stefan Weil <sw@weilnetz.de>
Stephen J Day <stephen.day@docker.com>
@ -134,6 +165,8 @@ Tonis Tiigi <tonistiigi@gmail.com>
Tony Holdstock-Brown <tony@docker.com>
Trevor Pounds <trevor.pounds@gmail.com>
Troels Thomsen <troels@thomsen.io>
Victor Vieux <vieux@docker.com>
Victoria Bialas <victoria.bialas@docker.com>
Vincent Batts <vbatts@redhat.com>
Vincent Demeester <vincent@sbr.pm>
Vincent Giersch <vincent.giersch@ovh.net>
@ -142,6 +175,8 @@ weiyuan.yl <weiyuan.yl@alibaba-inc.com>
xg.song <xg.song@venusource.com>
xiekeyang <xiekeyang@huawei.com>
Yann ROBERT <yann.robert@anantaplex.fr>
yaoyao.xyy <yaoyao.xyy@alibaba-inc.com>
yuexiao-wang <wang.yuexiao@zte.com.cn>
yuzou <zouyu7@huawei.com>
zhouhaibing089 <zhouhaibing089@gmail.com>
姜继忠 <jizhong.jiangjz@alibaba-inc.com>

View file

@ -11,7 +11,7 @@ Most people should use the [official Registry docker image](https://hub.docker.c
People looking for advanced operational use cases might consider rolling their own image with a custom Dockerfile inheriting `FROM registry:2`.
OS X users who want to run natively can do so following [the instructions here](osx-setup-guide.md).
OS X users who want to run natively can do so following [the instructions here](https://github.com/docker/docker.github.io/blob/master/registry/recipes/osx-setup-guide.md).
### Gotchas

View file

@ -1,9 +1,80 @@
# Changelog
## 2.6.0-rc1 (2016-10-10)
#### Storage
- S3: fixed bug in delete due to read-after-write inconsistency
- S3: allow EC2 IAM roles to be used when authorizing region endpoints
- S3: add Object ACL Support
- S3: fix delete method's notion of subpaths
- S3: use multipart upload API in `Move` method for performance
- S3: add v2 signature signing for legacy S3 clones
- Swift: add simple heuristic to detect incomplete DLOs during read ops
- Swift: support different user and tenant domains
- Swift: bulk deletes in chunks
- Aliyun OSS: fix delete method's notion of subpaths
- Aliyun OSS: optimize data copy after upload finishes
- Azure: close leaking response body
- Fix storage drivers dropping non-EOF errors when listing repositories
- Compare path properly when listing repositories in catalog
- Add a foreign layer URL host whitelist
- Improve catalog enumerate runtime
#### Registry
- Override media type returned from `Stat()` for existing manifests
- Export `storage.CreateOptions` in top-level package
- Enable notifications to endpoints that use self-signed certificates
- Properly validate multi-URL foreign layers
- Add control over validation of URLs in pushed manifests
- Proxy mode: fix socket leak when pull is cancelled
- Tag service: properly handle error responses on HEAD request
- Support for custom authentication URL in proxying registry
- Add configuration option to disable access logging
- Add notification filtering by target media type
- Manifest: `References()` returns all children
- Honor `X-Forwarded-Port` and Forwarded headers
- Reference: Preserve tag and digest in With* functions
#### Client
- Changes the client Tags `All()` method to follow links
- Allow registry clients to connect via HTTP2
- Better handling of OAuth errors in client
#### Spec
- Manifest: clarify relationship between urls and foreign layers
#### Manifest
- Add plugin mediatype to distribution manifest
#### Docs
- Document `TOOMANYREQUESTS` error code
- Document required Let's Encrypt port
- Improve documentation around implementation of OAuth2
#### Auth
- Add support for registry type in scope
- Add support for using v2 ping challenges for v1
- Add leeway to JWT `nbf` and `exp` checking
- htpasswd: dynamically parse htpasswd file
- Fix missing auth headers with PATCH HTTP request when pushing to default port
#### Dockerfile
- Update to go1.7
- Reorder Dockerfile steps for better layer caching
#### Notes
Documentation has moved to the documentation repository at
`github.com/docker/docker.github.io/tree/master/registry`
The registry is go 1.7 compliant, and passes newer, more restrictive `lint` and `vet` ing.
## 2.5.0 (2016-06-14)
### Storage
- Ensure uploads directory is cleaned after upload is commited
#### Storage
- Ensure uploads directory is cleaned after upload is committed
- Add ability to cap concurrent operations in filesystem driver
- S3: Add 'us-gov-west-1' to the valid region list
- Swift: Handle ceph not returning Last-Modified header for HEAD requests
@ -23,13 +94,13 @@
- Update the auth spec scope grammar to reflect the fact that hostnames are optionally supported
- Clarify API documentation around catalog fetch behavior
### API
#### API
- Support returning HTTP 429 (Too Many Requests)
### Documentation
#### Documentation
- Update auth documentation examples to show "expires in" as int
### Docker Image
#### Docker Image
- Use Alpine Linux as base image

View file

@ -1,4 +1,4 @@
FROM golang:1.6-alpine
FROM golang:1.7-alpine
ENV DISTRIBUTION_DIR /go/src/github.com/docker/distribution
ENV DOCKER_BUILDTAGS include_oss include_gcs

View file

@ -13,7 +13,7 @@ endif
GO_LDFLAGS=-ldflags "-X `go list ./version`.Version=$(VERSION)"
.PHONY: clean all fmt vet lint build test binaries
.PHONY: all build binaries clean dep-restore dep-save dep-validate fmt lint test test-full vet
.DEFAULT: all
all: fmt vet lint build test binaries
@ -27,22 +27,25 @@ version/version.go:
# Required for go 1.5 to build
GO15VENDOREXPERIMENT := 1
# Go files
GOFILES=$(shell find . -type f -name '*.go')
# Package list
PKGS := $(shell go list -tags "${DOCKER_BUILDTAGS}" ./... | grep -v ^github.com/docker/distribution/vendor/)
PKGS=$(shell go list -tags "${DOCKER_BUILDTAGS}" ./... | grep -v ^github.com/docker/distribution/vendor/)
# Resolving binary dependencies for specific targets
GOLINT := $(shell which golint || echo '')
GODEP := $(shell which godep || echo '')
GOLINT=$(shell which golint || echo '')
GODEP=$(shell which godep || echo '')
${PREFIX}/bin/registry: $(wildcard **/*.go)
${PREFIX}/bin/registry: $(GOFILES)
@echo "+ $@"
@go build -tags "${DOCKER_BUILDTAGS}" -o $@ ${GO_LDFLAGS} ${GO_GCFLAGS} ./cmd/registry
${PREFIX}/bin/digest: $(wildcard **/*.go)
${PREFIX}/bin/digest: $(GOFILES)
@echo "+ $@"
@go build -tags "${DOCKER_BUILDTAGS}" -o $@ ${GO_LDFLAGS} ${GO_GCFLAGS} ./cmd/digest
${PREFIX}/bin/registry-api-descriptor-template: $(wildcard **/*.go)
${PREFIX}/bin/registry-api-descriptor-template: $(GOFILES)
@echo "+ $@"
@go build -o $@ ${GO_LDFLAGS} ${GO_GCFLAGS} ./cmd/registry-api-descriptor-template

View file

@ -19,7 +19,7 @@ This repository contains the following components:
| **registry** | An implementation of the [Docker Registry HTTP API V2](docs/spec/api.md) for use with docker 1.6+. |
| **libraries** | A rich set of libraries for interacting with distribution components. Please see [godoc](https://godoc.org/github.com/docker/distribution) for details. **Note**: These libraries are **unstable**. |
| **specifications** | _Distribution_ related specifications are available in [docs/spec](docs/spec) |
| **documentation** | Docker's full documentation set is available at [docs.docker.com](https://docs.docker.com). This repository [contains the subset](docs/index.md) related just to the registry. |
| **documentation** | Docker's full documentation set is available at [docs.docker.com](https://docs.docker.com). This repository [contains the subset](docs/) related just to the registry. |
### How does this integrate with Docker engine?
@ -68,7 +68,7 @@ others, it is not.
For example, users with their own software products may want to maintain a
registry for private, company images. Also, you may wish to deploy your own
image repository for images used to test or in continuous integration. For these
use cases and others, [deploying your own registry instance](docs/deploying.md)
use cases and others, [deploying your own registry instance](https://github.com/docker/docker.github.io/blob/master/registry/deploying.md)
may be the better choice.
### Migration to Registry 2.0
@ -83,7 +83,7 @@ created. For more information see [docker/migrator]
Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to contribute
issues, fixes, and patches to this project. If you are contributing code, see
the instructions for [building a development environment](docs/recipes/building.md).
the instructions for [building a development environment](BUILDING.md).
## Support

View file

@ -0,0 +1,35 @@
## Registry Release Checklist
10. Compile release notes detailing features and since the last release. Update the `CHANGELOG.md` file.
20. Update the version file: `https://github.com/docker/distribution/blob/master/version/version.go`
30. Update the `MAINTAINERS` (if necessary), `AUTHORS` and `.mailmap` files.
```
make AUTHORS
```
40. Create a signed tag.
Distribution uses semantic versioning. Tags are of the format `vx.y.z[-rcn]`
You will need PGP installed and a PGP key which has been added to your Github account. The comment for the tag should include the release notes.
50. Push the signed tag
60. Create a new [release](https://github.com/docker/distribution/releases). In the case of a release candidate, tick the `pre-release` checkbox.
70. Update the registry binary in [distribution library image repo](https://github.com/docker/distribution-library-image) by running the update script and opening a pull request.
80. Update the official image. Add the new version in the [official images repo](https://github.com/docker-library/official-images) by appending a new version to the `registry/registry` file with the git hash pointed to by the signed tag. Update the major version to point to the latest version and the minor version to point to new patch release if necessary.
e.g. to release `2.3.1`
`2.3.1 (new)`
`2.3.0 -> 2.3.0` can be removed
`2 -> 2.3.1`
`2.3 -> 2.3.1`
90. Build a new distribution/registry image on [Docker hub](https://hub.docker.com/u/distribution/dashboard) by adding a new automated build with the new tag and re-building the images.

View file

@ -8,7 +8,7 @@ machine:
post:
# go
- gvm install go1.6 --prefer-binary --name=stable
- gvm install go1.7 --prefer-binary --name=stable
environment:
# Convenient shortcuts to "common" locations
@ -77,13 +77,16 @@ test:
timeout: 1000
pwd: $BASE_STABLE
# Test stable with race
- gvm use stable; export ROOT_PACKAGE=$(go list .); go list -tags "$DOCKER_BUILDTAGS" ./... | grep -v "/vendor/" | grep -v "registry/handlers" | grep -v "registry/storage/driver" | xargs -L 1 -I{} bash -c 'export PACKAGE={}; godep go test -race -tags "$DOCKER_BUILDTAGS" -test.short $PACKAGE':
timeout: 1000
pwd: $BASE_STABLE
post:
# Report to codecov
- bash <(curl -s https://codecov.io/bash):
pwd: $BASE_STABLE
## Notes
# Disabled the -race detector due to massive memory usage.
# Do we want these as well?
# - go get code.google.com/p/go.tools/cmd/goimports
# - test -z "$(goimports -l -w ./... | tee /dev/stderr)"

View file

@ -12,8 +12,13 @@ import (
// references and an optional target
type Manifest interface {
// References returns a list of objects which make up this manifest.
// The references are strictly ordered from base to head. A reference
// is anything which can be represented by a distribution.Descriptor
// A reference is anything which can be represented by a
// distribution.Descriptor. These can consist of layers, resources or other
// manifests.
//
// While no particular order is required, implementations should return
// them from highest to lowest priority. For example, one might want to
// return the base layer before the top layer.
References() []Descriptor
// Payload provides the serialized format of the manifest, in addition to
@ -36,6 +41,9 @@ type ManifestBuilder interface {
// AppendReference includes the given object in the manifest after any
// existing dependencies. If the add fails, such as when adding an
// unsupported dependency, an error may be returned.
//
// The destination of the reference is dependent on the manifest type and
// the dependency type.
AppendReference(dependency Describable) error
}

View file

@ -0,0 +1,12 @@
package reference
// IsNameOnly returns true if reference only contains a repo name.
func IsNameOnly(ref Named) bool {
if _, ok := ref.(NamedTagged); ok {
return false
}
if _, ok := ref.(Canonical); ok {
return false
}
return true
}

View file

@ -0,0 +1,22 @@
package reference
var (
defaultTag = "latest"
)
// EnsureTagged adds the default tag "latest" to a reference if it only has
// a repo name.
func EnsureTagged(ref Named) NamedTagged {
namedTagged, ok := ref.(NamedTagged)
if !ok {
namedTagged, err := WithTag(ref, defaultTag)
if err != nil {
// Default tag must be valid, to create a NamedTagged
// type with non-validated input the WithTag function
// should be used instead
panic(err)
}
return namedTagged
}
return namedTagged
}

View file

@ -24,6 +24,8 @@ package reference
import (
"errors"
"fmt"
"path"
"strings"
"github.com/docker/distribution/digest"
)
@ -43,6 +45,9 @@ var (
// ErrDigestInvalidFormat represents an error while trying to parse a string as a tag.
ErrDigestInvalidFormat = errors.New("invalid digest format")
// ErrNameContainsUppercase is returned for invalid repository names that contain uppercase characters.
ErrNameContainsUppercase = errors.New("repository name must be lowercase")
// ErrNameEmpty is returned for empty, invalid repository names.
ErrNameEmpty = errors.New("repository name must have at least one component")
@ -134,7 +139,7 @@ type Canonical interface {
func SplitHostname(named Named) (string, string) {
name := named.Name()
match := anchoredNameRegexp.FindStringSubmatch(name)
if match == nil || len(match) != 3 {
if len(match) != 3 {
return "", name
}
return match[1], match[2]
@ -149,7 +154,9 @@ func Parse(s string) (Reference, error) {
if s == "" {
return nil, ErrNameEmpty
}
// TODO(dmcgowan): Provide more specific and helpful error
if ReferenceRegexp.FindStringSubmatch(strings.ToLower(s)) != nil {
return nil, ErrNameContainsUppercase
}
return nil, ErrReferenceInvalidFormat
}
@ -212,6 +219,13 @@ func WithTag(name Named, tag string) (NamedTagged, error) {
if !anchoredTagRegexp.MatchString(tag) {
return nil, ErrTagInvalidFormat
}
if canonical, ok := name.(Canonical); ok {
return reference{
name: name.Name(),
tag: tag,
digest: canonical.Digest(),
}, nil
}
return taggedReference{
name: name.Name(),
tag: tag,
@ -224,12 +238,34 @@ func WithDigest(name Named, digest digest.Digest) (Canonical, error) {
if !anchoredDigestRegexp.MatchString(digest.String()) {
return nil, ErrDigestInvalidFormat
}
if tagged, ok := name.(Tagged); ok {
return reference{
name: name.Name(),
tag: tagged.Tag(),
digest: digest,
}, nil
}
return canonicalReference{
name: name.Name(),
digest: digest,
}, nil
}
// Match reports whether ref matches the specified pattern.
// See https://godoc.org/path#Match for supported patterns.
func Match(pattern string, ref Reference) (bool, error) {
matched, err := path.Match(pattern, ref.String())
if namedRef, isNamed := ref.(Named); isNamed && !matched {
matched, _ = path.Match(pattern, namedRef.Name())
}
return matched, err
}
// TrimNamed removes any tag or digest from the named reference.
func TrimNamed(ref Named) Named {
return repository(ref.Name())
}
func getBestReferenceType(ref reference) Reference {
if ref.name == "" {
// Allow digest only references

View file

@ -40,7 +40,7 @@ type Config struct {
AttachStdin bool // Attach the standard input, makes possible user interaction
AttachStdout bool // Attach the standard output
AttachStderr bool // Attach the standard error
ExposedPorts map[nat.Port]struct{} `json:",omitempty"` // List of exposed ports
ExposedPorts nat.PortSet `json:",omitempty"` // List of exposed ports
Tty bool // Attach standard streams to a tty, including stdin if it is not closed.
OpenStdin bool // Open stdin
StdinOnce bool // If true, close stdin after the 1 attached client disconnects.

View file

@ -0,0 +1,21 @@
package container
// ----------------------------------------------------------------------------
// DO NOT EDIT THIS FILE
// This file was generated by `swagger generate operation`
//
// See hack/generate-swagger-api.sh
// ----------------------------------------------------------------------------
// ContainerCreateCreatedBody container create created body
// swagger:model ContainerCreateCreatedBody
type ContainerCreateCreatedBody struct {
// The ID of the created container
// Required: true
ID string `json:"Id"`
// Warnings encountered when creating the container
// Required: true
Warnings []string `json:"Warnings"`
}

View file

@ -0,0 +1,17 @@
package container
// ----------------------------------------------------------------------------
// DO NOT EDIT THIS FILE
// This file was generated by `swagger generate operation`
//
// See hack/generate-swagger-api.sh
// ----------------------------------------------------------------------------
// ContainerUpdateOKBody container update o k body
// swagger:model ContainerUpdateOKBody
type ContainerUpdateOKBody struct {
// warnings
// Required: true
Warnings []string `json:"Warnings"`
}

View file

@ -0,0 +1,17 @@
package container
// ----------------------------------------------------------------------------
// DO NOT EDIT THIS FILE
// This file was generated by `swagger generate operation`
//
// See hack/generate-swagger-api.sh
// ----------------------------------------------------------------------------
// ContainerWaitOKBody container wait o k body
// swagger:model ContainerWaitOKBody
type ContainerWaitOKBody struct {
// Exit code of the container
// Required: true
StatusCode int64 `json:"StatusCode"`
}

View file

@ -234,6 +234,7 @@ type Resources struct {
// Applicable to all platforms
CPUShares int64 `json:"CpuShares"` // CPU shares (relative weight vs. other containers)
Memory int64 // Memory limit (in bytes)
NanoCPUs int64 `json:"NanoCpus"` // CPU quota in units of 10<sup>-9</sup> CPUs.
// Applicable to UNIX platforms
CgroupParent string // Parent cgroup.
@ -243,8 +244,10 @@ type Resources struct {
BlkioDeviceWriteBps []*blkiodev.ThrottleDevice
BlkioDeviceReadIOps []*blkiodev.ThrottleDevice
BlkioDeviceWriteIOps []*blkiodev.ThrottleDevice
CPUPeriod int64 `json:"CpuPeriod"` // CPU CFS (Completely Fair Scheduler) period
CPUQuota int64 `json:"CpuQuota"` // CPU CFS (Completely Fair Scheduler) quota
CPUPeriod int64 `json:"CpuPeriod"` // CPU CFS (Completely Fair Scheduler) period
CPUQuota int64 `json:"CpuQuota"` // CPU CFS (Completely Fair Scheduler) quota
CPURealtimePeriod int64 `json:"CpuRealtimePeriod"` // CPU real-time period
CPURealtimeRuntime int64 `json:"CpuRealtimeRuntime"` // CPU real-time runtime
CpusetCpus string // CpusetCpus 0-2, 0,1
CpusetMems string // CpusetMems 0-2, 0,1
Devices []DeviceMapping // List of devices to map inside the container
@ -324,4 +327,7 @@ type HostConfig struct {
// Run a custom init inside the container, if null, use the daemon's configured settings
Init *bool `json:",omitempty"`
// Custom init path
InitPath string `json:",omitempty"`
}

View file

@ -1,24 +1,35 @@
package mount
import (
"os"
)
// Type represents the type of a mount.
type Type string
// Type constants
const (
// TypeBind BIND
// TypeBind is the type for mounting host dir
TypeBind Type = "bind"
// TypeVolume VOLUME
// TypeVolume is the type for remote storage volumes
TypeVolume Type = "volume"
// TypeTmpfs is the type for mounting tmpfs
TypeTmpfs Type = "tmpfs"
)
// Mount represents a mount (volume).
type Mount struct {
Type Type `json:",omitempty"`
Type Type `json:",omitempty"`
// Source specifies the name of the mount. Depending on mount type, this
// may be a volume name or a host path, or even ignored.
// Source is not supported for tmpfs (must be an empty value)
Source string `json:",omitempty"`
Target string `json:",omitempty"`
ReadOnly bool `json:",omitempty"`
BindOptions *BindOptions `json:",omitempty"`
VolumeOptions *VolumeOptions `json:",omitempty"`
TmpfsOptions *TmpfsOptions `json:",omitempty"`
}
// Propagation represents the propagation of a mount.
@ -56,3 +67,37 @@ type Driver struct {
Name string `json:",omitempty"`
Options map[string]string `json:",omitempty"`
}
// TmpfsOptions defines options specific to mounts of type "tmpfs".
type TmpfsOptions struct {
// Size sets the size of the tmpfs, in bytes.
//
// This will be converted to an operating system specific value
// depending on the host. For example, on linux, it will be convered to
// use a 'k', 'm' or 'g' syntax. BSD, though not widely supported with
// docker, uses a straight byte value.
//
// Percentages are not supported.
SizeBytes int64 `json:",omitempty"`
// Mode of the tmpfs upon creation
Mode os.FileMode `json:",omitempty"`
// TODO(stevvooe): There are several more tmpfs flags, specified in the
// daemon, that are accepted. Only the most basic are added for now.
//
// From docker/docker/pkg/mount/flags.go:
//
// var validFlags = map[string]bool{
// "": true,
// "size": true, X
// "mode": true, X
// "uid": true,
// "gid": true,
// "nr_inodes": true,
// "nr_blocks": true,
// "mpol": true,
// }
//
// Some of these may be straightforward to add, but others, such as
// uid/gid have implications in a clustered system.
}

View file

@ -1,10 +1,10 @@
## Legacy API type versions
# Legacy API type versions
This package includes types for legacy API versions. The stable version of the API types live in `api/types/*.go`.
Consider moving a type here when you need to keep backwards compatibility in the API. This legacy types are organized by the latest API version they appear in. For instance, types in the `v1p19` package are valid for API versions below or equal `1.19`. Types in the `v1p20` package are valid for the API version `1.20`, since the versions below that will use the legacy types in `v1p19`.
### Package name conventions
## Package name conventions
The package name convention is to use `v` as a prefix for the version number and `p`(patch) as a separator. We use this nomenclature due to a few restrictions in the Go package name convention:

View file

@ -3,6 +3,7 @@ package graphdriver
import (
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
@ -12,7 +13,7 @@ import (
"github.com/docker/docker/pkg/archive"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/plugin/getter"
"github.com/docker/docker/pkg/plugingetter"
)
// FsMagic unsigned id of the filesystem in use.
@ -35,6 +36,13 @@ var (
ErrIncompatibleFS = fmt.Errorf("backing file system is unsupported for this graph driver")
)
//CreateOpts contains optional arguments for Create() and CreateReadWrite()
// methods.
type CreateOpts struct {
MountLabel string
StorageOpt map[string]string
}
// InitFunc initializes the storage driver.
type InitFunc func(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error)
@ -48,11 +56,13 @@ type ProtoDriver interface {
// String returns a string representation of this driver.
String() string
// CreateReadWrite creates a new, empty filesystem layer that is ready
// to be used as the storage for a container.
CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error
// to be used as the storage for a container. Additional options can
// be passed in opts. parent may be "" and opts may be nil.
CreateReadWrite(id, parent string, opts *CreateOpts) error
// Create creates a new, empty, filesystem layer with the
// specified id and parent and mountLabel. Parent and mountLabel may be "".
Create(id, parent, mountLabel string, storageOpt map[string]string) error
// specified id and parent and options passed in opts. Parent
// may be "" and opts may be nil.
Create(id, parent string, opts *CreateOpts) error
// Remove attempts to remove the filesystem layer with this id.
Remove(id string) error
// Get returns the mountpoint for the layered filesystem referred
@ -77,12 +87,11 @@ type ProtoDriver interface {
Cleanup() error
}
// Driver is the interface for layered/snapshot file system drivers.
type Driver interface {
ProtoDriver
// DiffDriver is the interface to use to implement graph diffs
type DiffDriver interface {
// Diff produces an archive of the changes between the specified
// layer and its parent layer which may be "".
Diff(id, parent string) (archive.Archive, error)
Diff(id, parent string) (io.ReadCloser, error)
// Changes produces a list of changes between the specified layer
// and its parent layer. If parent is "", then all changes will be ADD changes.
Changes(id, parent string) ([]archive.Change, error)
@ -90,13 +99,19 @@ type Driver interface {
// layer with the specified id and parent, returning the size of the
// new layer in bytes.
// The archive.Reader must be an uncompressed stream.
ApplyDiff(id, parent string, diff archive.Reader) (size int64, err error)
ApplyDiff(id, parent string, diff io.Reader) (size int64, err error)
// DiffSize calculates the changes between the specified id
// and its parent and returns the size in bytes of the changes
// relative to its base filesystem directory.
DiffSize(id, parent string) (size int64, err error)
}
// Driver is the interface for layered/snapshot file system drivers.
type Driver interface {
ProtoDriver
DiffDriver
}
// DiffGetterDriver is the interface for layered file system drivers that
// provide a specialized function for getting file contents for tar-split.
type DiffGetterDriver interface {
@ -135,11 +150,11 @@ func Register(name string, initFunc InitFunc) error {
}
// GetDriver initializes and returns the registered driver
func GetDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap, plugingetter getter.PluginGetter) (Driver, error) {
func GetDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap, pg plugingetter.PluginGetter) (Driver, error) {
if initFunc, exists := drivers[name]; exists {
return initFunc(filepath.Join(home, name), options, uidMaps, gidMaps)
}
if pluginDriver, err := lookupPlugin(name, home, options, plugingetter); err == nil {
if pluginDriver, err := lookupPlugin(name, home, options, pg); err == nil {
return pluginDriver, nil
}
logrus.Errorf("Failed to GetDriver graph %s %s", name, home)
@ -156,10 +171,10 @@ func getBuiltinDriver(name, home string, options []string, uidMaps, gidMaps []id
}
// New creates the driver and initializes it at the specified root.
func New(root string, name string, options []string, uidMaps, gidMaps []idtools.IDMap, plugingetter getter.PluginGetter) (Driver, error) {
func New(root, name string, options []string, uidMaps, gidMaps []idtools.IDMap, pg plugingetter.PluginGetter) (Driver, error) {
if name != "" {
logrus.Debugf("[graphdriver] trying provided driver: %s", name) // so the logs show specified driver
return GetDriver(name, root, options, uidMaps, gidMaps, plugingetter)
return GetDriver(name, root, options, uidMaps, gidMaps, pg)
}
// Guess for prior driver

View file

@ -56,8 +56,9 @@ var (
"aufs",
"btrfs",
"zfs",
"devicemapper",
"overlay2",
"overlay",
"devicemapper",
"vfs",
}

View file

@ -20,6 +20,7 @@ import (
"unsafe"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/mount"
)
const (
@ -44,6 +45,37 @@ func GetFSMagic(rootpath string) (FsMagic, error) {
return 0, nil
}
type fsChecker struct {
t FsMagic
}
func (c *fsChecker) IsMounted(path string) bool {
m, _ := Mounted(c.t, path)
return m
}
// NewFsChecker returns a checker configured for the provied FsMagic
func NewFsChecker(t FsMagic) Checker {
return &fsChecker{
t: t,
}
}
// NewDefaultChecker returns a check that parses /proc/mountinfo to check
// if the specified path is mounted.
// No-op on Solaris.
func NewDefaultChecker() Checker {
return &defaultChecker{}
}
type defaultChecker struct {
}
func (c *defaultChecker) IsMounted(path string) bool {
m, _ := mount.Mounted(path)
return m
}
// Mounted checks if the given path is mounted as the fs type
//Solaris supports only ZFS for now
func Mounted(fsType FsMagic, mountPath string) (bool, error) {

View file

@ -1,10 +1,10 @@
package graphdriver
import (
"io"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/archive"
"github.com/docker/docker/pkg/chrootarchive"
"github.com/docker/docker/pkg/idtools"
@ -43,7 +43,8 @@ func NewNaiveDiffDriver(driver ProtoDriver, uidMaps, gidMaps []idtools.IDMap) Dr
// Diff produces an archive of the changes between the specified
// layer and its parent layer which may be "".
func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch archive.Archive, err error) {
func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch io.ReadCloser, err error) {
startTime := time.Now()
driver := gdw.ProtoDriver
layerFs, err := driver.Get(id, "")
@ -88,6 +89,12 @@ func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch archive.Archive, err e
return ioutils.NewReadCloserWrapper(archive, func() error {
err := archive.Close()
driver.Put(id)
// NaiveDiffDriver compares file metadata with parent layers. Parent layers
// are extracted from tar's with full second precision on modified time.
// We need this hack here to make sure calls within same second receive
// correct result.
time.Sleep(startTime.Truncate(time.Second).Add(time.Second).Sub(time.Now()))
return err
}), nil
}
@ -119,7 +126,7 @@ func (gdw *NaiveDiffDriver) Changes(id, parent string) ([]archive.Change, error)
// ApplyDiff extracts the changeset from the given diff into the
// layer with the specified id and parent, returning the size of the
// new layer in bytes.
func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, diff archive.Reader) (size int64, err error) {
func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, diff io.Reader) (size int64, err error) {
driver := gdw.ProtoDriver
// Mount the root filesystem so we can apply the diff/layer.

View file

@ -1,12 +1,11 @@
// +build experimental
package graphdriver
import (
"fmt"
"io"
"path/filepath"
"github.com/docker/docker/plugin/getter"
"github.com/docker/docker/pkg/plugingetter"
)
type pluginClient interface {
@ -18,8 +17,8 @@ type pluginClient interface {
SendFile(string, io.Reader, interface{}) error
}
func lookupPlugin(name, home string, opts []string, pluginGetter getter.PluginGetter) (Driver, error) {
pl, err := pluginGetter.Get(name, "GraphDriver", getter.LOOKUP)
func lookupPlugin(name, home string, opts []string, pg plugingetter.PluginGetter) (Driver, error) {
pl, err := pg.Get(name, "GraphDriver", plugingetter.LOOKUP)
if err != nil {
return nil, fmt.Errorf("Error looking up graphdriver plugin %s: %v", name, err)
}
@ -28,5 +27,5 @@ func lookupPlugin(name, home string, opts []string, pluginGetter getter.PluginGe
func newPluginDriver(name, home string, opts []string, c pluginClient) (Driver, error) {
proxy := &graphDriverProxy{name, c}
return proxy, proxy.Init(home, opts)
return proxy, proxy.Init(filepath.Join(home, name), opts)
}

View file

@ -1,9 +0,0 @@
// +build !experimental
package graphdriver
import "github.com/docker/docker/plugin/getter"
func lookupPlugin(name, home string, opts []string, plugingetter getter.PluginGetter) (Driver, error) {
return nil, ErrNotSupported
}

View file

@ -1,10 +1,9 @@
// +build experimental
package graphdriver
import (
"errors"
"fmt"
"io"
"github.com/docker/docker/pkg/archive"
)
@ -54,7 +53,12 @@ func (d *graphDriverProxy) String() string {
return d.name
}
func (d *graphDriverProxy) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
func (d *graphDriverProxy) CreateReadWrite(id, parent string, opts *CreateOpts) error {
mountLabel := ""
if opts != nil {
mountLabel = opts.MountLabel
}
args := &graphDriverRequest{
ID: id,
Parent: parent,
@ -70,7 +74,11 @@ func (d *graphDriverProxy) CreateReadWrite(id, parent, mountLabel string, storag
return nil
}
func (d *graphDriverProxy) Create(id, parent, mountLabel string, storageOpt map[string]string) error {
func (d *graphDriverProxy) Create(id, parent string, opts *CreateOpts) error {
mountLabel := ""
if opts != nil {
mountLabel = opts.MountLabel
}
args := &graphDriverRequest{
ID: id,
Parent: parent,
@ -170,7 +178,7 @@ func (d *graphDriverProxy) Cleanup() error {
return nil
}
func (d *graphDriverProxy) Diff(id, parent string) (archive.Archive, error) {
func (d *graphDriverProxy) Diff(id, parent string) (io.ReadCloser, error) {
args := &graphDriverRequest{
ID: id,
Parent: parent,
@ -179,7 +187,7 @@ func (d *graphDriverProxy) Diff(id, parent string) (archive.Archive, error) {
if err != nil {
return nil, err
}
return archive.Archive(body), nil
return body, nil
}
func (d *graphDriverProxy) Changes(id, parent string) ([]archive.Change, error) {
@ -198,7 +206,7 @@ func (d *graphDriverProxy) Changes(id, parent string) ([]archive.Change, error)
return ret.Changes, nil
}
func (d *graphDriverProxy) ApplyDiff(id, parent string, diff archive.Reader) (int64, error) {
func (d *graphDriverProxy) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
var ret graphDriverResponse
if err := d.client.SendFile(fmt.Sprintf("GraphDriver.ApplyDiff?id=%s&parent=%s", id, parent), diff, &ret); err != nil {
return -1, err

View file

@ -3,6 +3,7 @@ package layer
import (
"archive/tar"
"bytes"
"fmt"
"io"
"io/ioutil"
)
@ -23,6 +24,13 @@ func (el *emptyLayer) TarStream() (io.ReadCloser, error) {
return ioutil.NopCloser(buf), nil
}
func (el *emptyLayer) TarStreamFrom(p ChainID) (io.ReadCloser, error) {
if p == "" {
return el.TarStream()
}
return nil, fmt.Errorf("can't get parent tar stream of an empty layer")
}
func (el *emptyLayer) ChainID() ChainID {
return ChainID(DigestSHA256EmptyTar)
}

View file

@ -84,6 +84,10 @@ type TarStreamer interface {
type Layer interface {
TarStreamer
// TarStreamFrom returns a tar archive stream for all the layer chain with
// arbitrary depth.
TarStreamFrom(ChainID) (io.ReadCloser, error)
// ChainID returns the content hash of the entire layer chain. The hash
// chain is made up of DiffID of top layer and all of its parents.
ChainID() ChainID
@ -165,14 +169,22 @@ type Metadata struct {
// RWLayer.
type MountInit func(root string) error
// CreateRWLayerOpts contains optional arguments to be passed to CreateRWLayer
type CreateRWLayerOpts struct {
MountLabel string
InitFunc MountInit
StorageOpt map[string]string
}
// Store represents a backend for managing both
// read-only and read-write layers.
type Store interface {
Register(io.Reader, ChainID) (Layer, error)
Get(ChainID) (Layer, error)
Map() map[ChainID]Layer
Release(Layer) ([]Metadata, error)
CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit, storageOpt map[string]string) (RWLayer, error)
CreateRWLayer(id string, parent ChainID, opts *CreateRWLayerOpts) (RWLayer, error)
GetRWLayer(id string) (RWLayer, error)
GetMountID(id string) (string, error)
ReleaseRWLayer(RWLayer) ([]Metadata, error)

View file

@ -11,10 +11,9 @@ import (
"github.com/docker/distribution"
"github.com/docker/distribution/digest"
"github.com/docker/docker/daemon/graphdriver"
"github.com/docker/docker/pkg/archive"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/plugingetter"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/plugin/getter"
"github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage"
)
@ -45,7 +44,7 @@ type StoreOptions struct {
GraphDriverOptions []string
UIDMaps []idtools.IDMap
GIDMaps []idtools.IDMap
PluginGetter getter.PluginGetter
PluginGetter plugingetter.PluginGetter
}
// NewStoreFromOptions creates a new Store instance
@ -221,7 +220,7 @@ func (ls *layerStore) applyTar(tx MetadataTransaction, ts io.Reader, parent stri
return err
}
applySize, err := ls.driver.ApplyDiff(layer.cacheID, parent, archive.Reader(rdr))
applySize, err := ls.driver.ApplyDiff(layer.cacheID, parent, rdr)
if err != nil {
return err
}
@ -278,7 +277,7 @@ func (ls *layerStore) registerWithDescriptor(ts io.Reader, parent ChainID, descr
descriptor: descriptor,
}
if err = ls.driver.Create(layer.cacheID, pid, "", nil); err != nil {
if err = ls.driver.Create(layer.cacheID, pid, nil); err != nil {
return nil, err
}
@ -360,6 +359,19 @@ func (ls *layerStore) Get(l ChainID) (Layer, error) {
return layer.getReference(), nil
}
func (ls *layerStore) Map() map[ChainID]Layer {
ls.layerL.Lock()
defer ls.layerL.Unlock()
layers := map[ChainID]Layer{}
for k, v := range ls.layerMap {
layers[k] = v
}
return layers
}
func (ls *layerStore) deleteLayer(layer *roLayer, metadata *Metadata) error {
err := ls.driver.Remove(layer.cacheID)
if err != nil {
@ -432,7 +444,19 @@ func (ls *layerStore) Release(l Layer) ([]Metadata, error) {
return ls.releaseLayer(layer)
}
func (ls *layerStore) CreateRWLayer(name string, parent ChainID, mountLabel string, initFunc MountInit, storageOpt map[string]string) (RWLayer, error) {
func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWLayerOpts) (RWLayer, error) {
var (
storageOpt map[string]string
initFunc MountInit
mountLabel string
)
if opts != nil {
mountLabel = opts.MountLabel
storageOpt = opts.StorageOpt
initFunc = opts.InitFunc
}
ls.mountL.Lock()
defer ls.mountL.Unlock()
m, ok := ls.mounts[name]
@ -476,7 +500,11 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, mountLabel stri
m.initID = pid
}
if err = ls.driver.CreateReadWrite(m.mountID, pid, "", storageOpt); err != nil {
createOpts := &graphdriver.CreateOpts{
StorageOpt: storageOpt,
}
if err = ls.driver.CreateReadWrite(m.mountID, pid, createOpts); err != nil {
return nil, err
}
@ -586,7 +614,12 @@ func (ls *layerStore) initMount(graphID, parent, mountLabel string, initFunc Mou
// then the initID should be randomly generated.
initID := fmt.Sprintf("%s-init", graphID)
if err := ls.driver.CreateReadWrite(initID, parent, mountLabel, storageOpt); err != nil {
createOpts := &graphdriver.CreateOpts{
MountLabel: mountLabel,
StorageOpt: storageOpt,
}
if err := ls.driver.CreateReadWrite(initID, parent, createOpts); err != nil {
return "", err
}
p, err := ls.driver.Get(initID, "")

View file

@ -28,11 +28,7 @@ func (ml *mountedLayer) cacheParent() string {
}
func (ml *mountedLayer) TarStream() (io.ReadCloser, error) {
archiver, err := ml.layerStore.driver.Diff(ml.mountID, ml.cacheParent())
if err != nil {
return nil, err
}
return archiver, nil
return ml.layerStore.driver.Diff(ml.mountID, ml.cacheParent())
}
func (ml *mountedLayer) Name() string {

View file

@ -21,6 +21,8 @@ type roLayer struct {
references map[Layer]struct{}
}
// TarStream for roLayer guarentees that the data that is produced is the exact
// data that the layer was registered with.
func (rl *roLayer) TarStream() (io.ReadCloser, error) {
r, err := rl.layerStore.store.TarSplitReader(rl.chainID)
if err != nil {
@ -43,6 +45,24 @@ func (rl *roLayer) TarStream() (io.ReadCloser, error) {
return rc, nil
}
// TarStreamFrom does not make any guarentees to the correctness of the produced
// data. As such it should not be used when the layer content must be verified
// to be an exact match to the registered layer.
func (rl *roLayer) TarStreamFrom(parent ChainID) (io.ReadCloser, error) {
var parentCacheID string
for pl := rl.parent; pl != nil; pl = pl.parent {
if pl.chainID == parent {
parentCacheID = pl.cacheID
break
}
}
if parent != ChainID("") && parentCacheID == "" {
return nil, fmt.Errorf("layer ID '%s' is not a parent of the specified layer: cannot provide diff to non-parent", parent)
}
return rl.layerStore.driver.Diff(rl.cacheID, parentCacheID)
}
func (rl *roLayer) ChainID() ChainID {
return rl.chainID
}

View file

@ -27,10 +27,6 @@ import (
)
type (
// Archive is a type of io.ReadCloser which has two interfaces Read and Closer.
Archive io.ReadCloser
// Reader is a type of io.Reader.
Reader io.Reader
// Compression is the state represents if compressed or not.
Compression int
// WhiteoutFormat is the format of whiteouts unpacked
@ -39,6 +35,7 @@ type (
TarChownOptions struct {
UID, GID int
}
// TarOptions wraps the tar options.
TarOptions struct {
IncludeFiles []string
@ -244,7 +241,7 @@ func (compression *Compression) Extension() string {
}
type tarWhiteoutConverter interface {
ConvertWrite(*tar.Header, string, os.FileInfo) error
ConvertWrite(*tar.Header, string, os.FileInfo) (*tar.Header, error)
ConvertRead(*tar.Header, string) (bool, error)
}
@ -311,7 +308,7 @@ func (ta *tarAppender) addTarFile(path, name string) error {
}
// if it's not a directory and has more than 1 link,
// it's hardlinked, so set the type flag accordingly
// it's hard linked, so set the type flag accordingly
if !fi.IsDir() && hasHardlinks(fi) {
// a link should have a name that it links too
// and that linked name should be first in the tar archive
@ -351,9 +348,25 @@ func (ta *tarAppender) addTarFile(path, name string) error {
}
if ta.WhiteoutConverter != nil {
if err := ta.WhiteoutConverter.ConvertWrite(hdr, path, fi); err != nil {
wo, err := ta.WhiteoutConverter.ConvertWrite(hdr, path, fi)
if err != nil {
return err
}
// If a new whiteout file exists, write original hdr, then
// replace hdr with wo to be written after. Whiteouts should
// always be written after the original. Note the original
// hdr may have been updated to be a whiteout with returning
// a whiteout header
if wo != nil {
if err := ta.TarWriter.WriteHeader(hdr); err != nil {
return err
}
if hdr.Typeflag == tar.TypeReg && hdr.Size > 0 {
return fmt.Errorf("tar: cannot use whiteout for non-empty file")
}
hdr = wo
}
}
if err := ta.TarWriter.WriteHeader(hdr); err != nil {
@ -361,7 +374,10 @@ func (ta *tarAppender) addTarFile(path, name string) error {
}
if hdr.Typeflag == tar.TypeReg && hdr.Size > 0 {
file, err := os.Open(path)
// We use system.OpenSequential to ensure we use sequential file
// access on Windows to avoid depleting the standby list.
// On Linux, this equates to a regular os.Open.
file, err := system.OpenSequential(path)
if err != nil {
return err
}
@ -399,8 +415,10 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
}
case tar.TypeReg, tar.TypeRegA:
// Source is regular file
file, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, hdrInfo.Mode())
// Source is regular file. We use system.OpenFileSequential to use sequential
// file access to avoid depleting the standby list on Windows.
// On Linux, this equates to a regular os.OpenFile
file, err := system.OpenFileSequential(path, os.O_CREATE|os.O_WRONLY, hdrInfo.Mode())
if err != nil {
return err
}
@ -1050,7 +1068,7 @@ func (archiver *Archiver) CopyFileWithTar(src, dst string) (err error) {
return nil
})
defer func() {
if er := <-errC; err != nil {
if er := <-errC; err == nil && er != nil {
err = er
}
}()
@ -1106,7 +1124,7 @@ func cmdStream(cmd *exec.Cmd, input io.Reader) (io.ReadCloser, <-chan struct{},
// NewTempArchive reads the content of src into a temporary file, and returns the contents
// of that file as an archive. The archive can only be read once - as soon as reading completes,
// the file will be deleted.
func NewTempArchive(src Archive, dir string) (*TempArchive, error) {
func NewTempArchive(src io.Reader, dir string) (*TempArchive, error) {
f, err := ioutil.TempFile(dir, "")
if err != nil {
return nil, err

View file

@ -19,7 +19,7 @@ func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter {
type overlayWhiteoutConverter struct{}
func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) error {
func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) (wo *tar.Header, err error) {
// convert whiteouts to AUFS format
if fi.Mode()&os.ModeCharDevice != 0 && hdr.Devmajor == 0 && hdr.Devminor == 0 {
// we just rename the file and make it normal
@ -34,12 +34,16 @@ func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os
// convert opaque dirs to AUFS format by writing an empty file with the prefix
opaque, err := system.Lgetxattr(path, "trusted.overlay.opaque")
if err != nil {
return err
return nil, err
}
if opaque != nil && len(opaque) == 1 && opaque[0] == 'y' {
if len(opaque) == 1 && opaque[0] == 'y' {
if hdr.Xattrs != nil {
delete(hdr.Xattrs, "trusted.overlay.opaque")
}
// create a header for the whiteout file
// it should inherit some properties from the parent, but be a regular file
*hdr = tar.Header{
wo = &tar.Header{
Typeflag: tar.TypeReg,
Mode: hdr.Mode & int64(os.ModePerm),
Name: filepath.Join(hdr.Name, WhiteoutOpaqueDir),
@ -54,7 +58,7 @@ func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os
}
}
return nil
return
}
func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) {

View file

@ -391,7 +391,7 @@ func ChangesSize(newDir string, changes []Change) int64 {
}
// ExportChanges produces an Archive from the provided changes, relative to dir.
func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (Archive, error) {
func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) {
reader, writer := io.Pipe()
go func() {
ta := &tarAppender{

View file

@ -302,7 +302,7 @@ func overlayDeletedFile(root, path string, fi os.FileInfo) (string, error) {
if err != nil {
return "", err
}
if opaque != nil && len(opaque) == 1 && opaque[0] == 'y' {
if len(opaque) == 1 && opaque[0] == 'y' {
return path, nil
}
}

View file

@ -88,13 +88,13 @@ func SplitPathDirEntry(path string) (dir, base string) {
// This function acts as a convenient wrapper around TarWithOptions, which
// requires a directory as the source path. TarResource accepts either a
// directory or a file path and correctly sets the Tar options.
func TarResource(sourceInfo CopyInfo) (content Archive, err error) {
func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
}
// TarResourceRebase is like TarResource but renames the first path element of
// items in the resulting tar archive to match the given rebaseName if not "".
func TarResourceRebase(sourcePath, rebaseName string) (content Archive, err error) {
func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
sourcePath = normalizePath(sourcePath)
if _, err = os.Lstat(sourcePath); err != nil {
// Catches the case where the source does not exist or is not a
@ -241,7 +241,7 @@ func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
// contain the archived resource described by srcInfo, to the destination
// described by dstInfo. Returns the possibly modified content archive along
// with the path to the destination directory which it should be extracted to.
func PrepareArchiveCopy(srcContent Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content Archive, err error) {
func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
// Ensure in platform semantics
srcInfo.Path = normalizePath(srcInfo.Path)
dstInfo.Path = normalizePath(dstInfo.Path)
@ -304,7 +304,7 @@ func PrepareArchiveCopy(srcContent Reader, srcInfo, dstInfo CopyInfo) (dstDir st
// RebaseArchiveEntries rewrites the given srcContent archive replacing
// an occurrence of oldBase with newBase at the beginning of entry names.
func RebaseArchiveEntries(srcContent Reader, oldBase, newBase string) Archive {
func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
if oldBase == string(os.PathSeparator) {
// If oldBase specifies the root directory, use an empty string as
// oldBase instead so that newBase doesn't replace the path separator
@ -380,7 +380,7 @@ func CopyResource(srcPath, dstPath string, followLink bool) error {
// CopyTo handles extracting the given content whose
// entries should be sourced from srcInfo to dstPath.
func CopyTo(content Reader, srcInfo CopyInfo, dstPath string) error {
func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
// The destination path need not exist, but CopyInfoDestinationPath will
// ensure that at least the parent directory exists.
dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))

View file

@ -19,7 +19,7 @@ import (
// UnpackLayer unpack `layer` to a `dest`. The stream `layer` can be
// compressed or uncompressed.
// Returns the size in bytes of the contents of the layer.
func UnpackLayer(dest string, layer Reader, options *TarOptions) (size int64, err error) {
func UnpackLayer(dest string, layer io.Reader, options *TarOptions) (size int64, err error) {
tr := tar.NewReader(layer)
trBuf := pools.BufioReader32KPool.Get(tr)
defer pools.BufioReader32KPool.Put(trBuf)
@ -246,7 +246,7 @@ func UnpackLayer(dest string, layer Reader, options *TarOptions) (size int64, er
// and applies it to the directory `dest`. The stream `layer` can be
// compressed or uncompressed.
// Returns the size in bytes of the contents of the layer.
func ApplyLayer(dest string, layer Reader) (int64, error) {
func ApplyLayer(dest string, layer io.Reader) (int64, error) {
return applyLayerHandler(dest, layer, &TarOptions{}, true)
}
@ -254,12 +254,12 @@ func ApplyLayer(dest string, layer Reader) (int64, error) {
// `layer`, and applies it to the directory `dest`. The stream `layer`
// can only be uncompressed.
// Returns the size in bytes of the contents of the layer.
func ApplyUncompressedLayer(dest string, layer Reader, options *TarOptions) (int64, error) {
func ApplyUncompressedLayer(dest string, layer io.Reader, options *TarOptions) (int64, error) {
return applyLayerHandler(dest, layer, options, false)
}
// do the bulk load of ApplyLayer, but allow for not calling DecompressStream
func applyLayerHandler(dest string, layer Reader, options *TarOptions, decompress bool) (int64, error) {
func applyLayerHandler(dest string, layer io.Reader, options *TarOptions, decompress bool) (int64, error) {
dest = filepath.Clean(dest)
// We need to be able to set any perms

View file

@ -3,7 +3,7 @@ package archive
import (
"archive/tar"
"bytes"
"io/ioutil"
"io"
)
// Generate generates a new archive from the content provided
@ -22,7 +22,7 @@ import (
//
// FIXME: stream content instead of buffering
// FIXME: specify permissions and other archive metadata
func Generate(input ...string) (Archive, error) {
func Generate(input ...string) (io.Reader, error) {
files := parseStringPairs(input...)
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
@ -42,7 +42,7 @@ func Generate(input ...string) (Archive, error) {
if err := tw.Close(); err != nil {
return nil, err
}
return ioutil.NopCloser(buf), nil
return buf, nil
}
func parseStringPairs(input ...string) (output [][2]string) {

View file

@ -26,10 +26,17 @@ func chroot(path string) (err error) {
return fmt.Errorf("Error creating mount namespace before pivot: %v", err)
}
if err := mount.MakeRPrivate(path); err != nil {
// make everything in new ns private
if err := mount.MakeRPrivate("/"); err != nil {
return err
}
if mounted, _ := mount.Mounted(path); !mounted {
if err := mount.Mount(path, path, "bind", "rbind,rw"); err != nil {
return realChroot(path)
}
}
// setup oldRoot for pivot_root
pivotDir, err := ioutil.TempDir(path, ".pivot_root")
if err != nil {
@ -57,13 +64,6 @@ func chroot(path string) (err error) {
err = errCleanup
}
}
if errCleanup := syscall.Unmount("/", syscall.MNT_DETACH); errCleanup != nil {
if err == nil {
err = fmt.Errorf("error unmounting root: %v", errCleanup)
}
return
}
}()
if err := syscall.PivotRoot(path, pivotDir); err != nil {

View file

@ -1,12 +1,16 @@
package chrootarchive
import "github.com/docker/docker/pkg/archive"
import (
"io"
"github.com/docker/docker/pkg/archive"
)
// ApplyLayer parses a diff in the standard layer format from `layer`,
// and applies it to the directory `dest`. The stream `layer` can only be
// uncompressed.
// Returns the size in bytes of the contents of the layer.
func ApplyLayer(dest string, layer archive.Reader) (size int64, err error) {
func ApplyLayer(dest string, layer io.Reader) (size int64, err error) {
return applyLayerHandler(dest, layer, &archive.TarOptions{}, true)
}
@ -14,6 +18,6 @@ func ApplyLayer(dest string, layer archive.Reader) (size int64, err error) {
// `layer`, and applies it to the directory `dest`. The stream `layer`
// can only be uncompressed.
// Returns the size in bytes of the contents of the layer.
func ApplyUncompressedLayer(dest string, layer archive.Reader, options *archive.TarOptions) (int64, error) {
func ApplyUncompressedLayer(dest string, layer io.Reader, options *archive.TarOptions) (int64, error) {
return applyLayerHandler(dest, layer, options, false)
}

View file

@ -7,6 +7,7 @@ import (
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
@ -81,7 +82,7 @@ func applyLayer() {
// applyLayerHandler parses a diff in the standard layer format from `layer`, and
// applies it to the directory `dest`. Returns the size in bytes of the
// contents of the layer.
func applyLayerHandler(dest string, layer archive.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
func applyLayerHandler(dest string, layer io.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
dest = filepath.Clean(dest)
if decompress {
decompressed, err := archive.DecompressStream(layer)

View file

@ -2,6 +2,7 @@ package chrootarchive
import (
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
@ -13,7 +14,7 @@ import (
// applyLayerHandler parses a diff in the standard layer format from `layer`, and
// applies it to the directory `dest`. Returns the size in bytes of the
// contents of the layer.
func applyLayerHandler(dest string, layer archive.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
func applyLayerHandler(dest string, layer io.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
dest = filepath.Clean(dest)
// Ensure it is a Windows-style volume path

View file

@ -180,7 +180,7 @@ func regexpMatch(pattern, path string) (bool, error) {
} else if ch == '?' {
// "?" is any char except "/"
regStr += "[^" + escSL + "]"
} else if strings.Index(".$", string(ch)) != -1 {
} else if ch == '.' || ch == '$' {
// Escape some regexp special chars that have no meaning
// in golang's filepath.Match
regStr += `\` + string(ch)

View file

@ -3,10 +3,21 @@
package idtools
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"github.com/docker/docker/pkg/system"
"github.com/opencontainers/runc/libcontainer/user"
)
var (
entOnce sync.Once
getentCmd string
)
func mkdirAs(path string, mode os.FileMode, ownerUID, ownerGID int, mkAll, chownExisting bool) error {
@ -84,3 +95,113 @@ func accessible(isOwner, isGroup bool, perms os.FileMode) bool {
}
return false
}
// LookupUser uses traditional local system files lookup (from libcontainer/user) on a username,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupUser(username string) (user.User, error) {
// first try a local system files lookup using existing capabilities
usr, err := user.LookupUser(username)
if err == nil {
return usr, nil
}
// local files lookup failed; attempt to call `getent` to query configured passwd dbs
usr, err = getentUser(fmt.Sprintf("%s %s", "passwd", username))
if err != nil {
return user.User{}, err
}
return usr, nil
}
// LookupUID uses traditional local system files lookup (from libcontainer/user) on a uid,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupUID(uid int) (user.User, error) {
// first try a local system files lookup using existing capabilities
usr, err := user.LookupUid(uid)
if err == nil {
return usr, nil
}
// local files lookup failed; attempt to call `getent` to query configured passwd dbs
return getentUser(fmt.Sprintf("%s %d", "passwd", uid))
}
func getentUser(args string) (user.User, error) {
reader, err := callGetent(args)
if err != nil {
return user.User{}, err
}
users, err := user.ParsePasswd(reader)
if err != nil {
return user.User{}, err
}
if len(users) == 0 {
return user.User{}, fmt.Errorf("getent failed to find passwd entry for %q", strings.Split(args, " ")[1])
}
return users[0], nil
}
// LookupGroup uses traditional local system files lookup (from libcontainer/user) on a group name,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupGroup(groupname string) (user.Group, error) {
// first try a local system files lookup using existing capabilities
group, err := user.LookupGroup(groupname)
if err == nil {
return group, nil
}
// local files lookup failed; attempt to call `getent` to query configured group dbs
return getentGroup(fmt.Sprintf("%s %s", "group", groupname))
}
// LookupGID uses traditional local system files lookup (from libcontainer/user) on a group ID,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupGID(gid int) (user.Group, error) {
// first try a local system files lookup using existing capabilities
group, err := user.LookupGid(gid)
if err == nil {
return group, nil
}
// local files lookup failed; attempt to call `getent` to query configured group dbs
return getentGroup(fmt.Sprintf("%s %d", "group", gid))
}
func getentGroup(args string) (user.Group, error) {
reader, err := callGetent(args)
if err != nil {
return user.Group{}, err
}
groups, err := user.ParseGroup(reader)
if err != nil {
return user.Group{}, err
}
if len(groups) == 0 {
return user.Group{}, fmt.Errorf("getent failed to find groups entry for %q", strings.Split(args, " ")[1])
}
return groups[0], nil
}
func callGetent(args string) (io.Reader, error) {
entOnce.Do(func() { getentCmd, _ = resolveBinary("getent") })
// if no `getent` command on host, can't do anything else
if getentCmd == "" {
return nil, fmt.Errorf("")
}
out, err := execCmd(getentCmd, args)
if err != nil {
exitCode, errC := system.GetExitCode(err)
if errC != nil {
return nil, err
}
switch exitCode {
case 1:
return nil, fmt.Errorf("getent reported invalid parameters/database unknown")
case 2:
terms := strings.Split(args, " ")
return nil, fmt.Errorf("getent unable to find entry %q in %s database", terms[1], terms[0])
case 3:
return nil, fmt.Errorf("getent database doesn't support enumeration")
default:
return nil, err
}
}
return bytes.NewReader(out), nil
}

View file

@ -2,8 +2,6 @@ package idtools
import (
"fmt"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strconv"
@ -33,23 +31,6 @@ var (
userMod = "usermod"
)
func resolveBinary(binname string) (string, error) {
binaryPath, err := exec.LookPath(binname)
if err != nil {
return "", err
}
resolvedPath, err := filepath.EvalSymlinks(binaryPath)
if err != nil {
return "", err
}
//only return no error if the final resolved binary basename
//matches what was searched for
if filepath.Base(resolvedPath) == binname {
return resolvedPath, nil
}
return "", fmt.Errorf("Binary %q does not resolve to a binary of that name in $PATH (%q)", binname, resolvedPath)
}
// AddNamespaceRangesUser takes a username and uses the standard system
// utility to create a system user/group pair used to hold the
// /etc/sub{uid,gid} ranges which will be used for user namespace
@ -181,8 +162,3 @@ func wouldOverlap(arange subIDRange, ID int) bool {
}
return false
}
func execCmd(cmd, args string) ([]byte, error) {
execCmd := exec.Command(cmd, strings.Split(args, " ")...)
return execCmd.CombinedOutput()
}

View file

@ -0,0 +1,32 @@
// +build !windows
package idtools
import (
"fmt"
"os/exec"
"path/filepath"
"strings"
)
func resolveBinary(binname string) (string, error) {
binaryPath, err := exec.LookPath(binname)
if err != nil {
return "", err
}
resolvedPath, err := filepath.EvalSymlinks(binaryPath)
if err != nil {
return "", err
}
//only return no error if the final resolved binary basename
//matches what was searched for
if filepath.Base(resolvedPath) == binname {
return resolvedPath, nil
}
return "", fmt.Errorf("Binary %q does not resolve to a binary of that name in $PATH (%q)", binname, resolvedPath)
}
func execCmd(cmd, args string) ([]byte, error) {
execCmd := exec.Command(cmd, strings.Split(args, " ")...)
return execCmd.CombinedOutput()
}

View file

@ -155,18 +155,18 @@ func (r *multiReadSeeker) Read(b []byte) (int, error) {
r.pos = &pos{0, 0}
}
bCap := int64(cap(b))
bLen := int64(len(b))
buf := bytes.NewBuffer(nil)
var rdr io.ReadSeeker
for _, rdr = range r.readers[r.pos.idx:] {
readBytes, err := io.CopyN(buf, rdr, bCap)
readBytes, err := io.CopyN(buf, rdr, bLen)
if err != nil && err != io.EOF {
return -1, err
}
bCap -= readBytes
bLen -= readBytes
if bCap == 0 {
if bLen == 0 {
break
}
}

View file

@ -0,0 +1,58 @@
// +build solaris
package mount
// MakeShared ensures a mounted filesystem has the SHARED mount option enabled.
// See the supported options in flags.go for further reference.
func MakeShared(mountPoint string) error {
return ensureMountedAs(mountPoint, "shared")
}
// MakeRShared ensures a mounted filesystem has the RSHARED mount option enabled.
// See the supported options in flags.go for further reference.
func MakeRShared(mountPoint string) error {
return ensureMountedAs(mountPoint, "rshared")
}
// MakePrivate ensures a mounted filesystem has the PRIVATE mount option enabled.
// See the supported options in flags.go for further reference.
func MakePrivate(mountPoint string) error {
return ensureMountedAs(mountPoint, "private")
}
// MakeRPrivate ensures a mounted filesystem has the RPRIVATE mount option
// enabled. See the supported options in flags.go for further reference.
func MakeRPrivate(mountPoint string) error {
return ensureMountedAs(mountPoint, "rprivate")
}
// MakeSlave ensures a mounted filesystem has the SLAVE mount option enabled.
// See the supported options in flags.go for further reference.
func MakeSlave(mountPoint string) error {
return ensureMountedAs(mountPoint, "slave")
}
// MakeRSlave ensures a mounted filesystem has the RSLAVE mount option enabled.
// See the supported options in flags.go for further reference.
func MakeRSlave(mountPoint string) error {
return ensureMountedAs(mountPoint, "rslave")
}
// MakeUnbindable ensures a mounted filesystem has the UNBINDABLE mount option
// enabled. See the supported options in flags.go for further reference.
func MakeUnbindable(mountPoint string) error {
return ensureMountedAs(mountPoint, "unbindable")
}
// MakeRUnbindable ensures a mounted filesystem has the RUNBINDABLE mount
// option enabled. See the supported options in flags.go for further reference.
func MakeRUnbindable(mountPoint string) error {
return ensureMountedAs(mountPoint, "runbindable")
}
func ensureMountedAs(mountPoint, options string) error {
// TODO: Solaris does not support bind mounts.
// Evaluate lofs and also look at the relevant
// mount flags to be supported.
return nil
}

View file

@ -1,4 +1,4 @@
package getter
package plugingetter
import "github.com/docker/docker/pkg/plugins"

View file

@ -19,8 +19,7 @@ const (
defaultTimeOut = 30
)
// NewClient creates a new plugin client (http).
func NewClient(addr string, tlsConfig *tlsconfig.Options) (*Client, error) {
func newTransport(addr string, tlsConfig *tlsconfig.Options) (transport.Transport, error) {
tr := &http.Transport{}
if tlsConfig != nil {
@ -45,15 +44,33 @@ func NewClient(addr string, tlsConfig *tlsconfig.Options) (*Client, error) {
}
scheme := httpScheme(u)
clientTransport := transport.NewHTTPTransport(tr, scheme, socket)
return NewClientWithTransport(clientTransport), nil
return transport.NewHTTPTransport(tr, scheme, socket), nil
}
// NewClientWithTransport creates a new plugin client with a given transport.
func NewClientWithTransport(tr transport.Transport) *Client {
// NewClient creates a new plugin client (http).
func NewClient(addr string, tlsConfig *tlsconfig.Options) (*Client, error) {
clientTransport, err := newTransport(addr, tlsConfig)
if err != nil {
return nil, err
}
return newClientWithTransport(clientTransport, 0), nil
}
// NewClientWithTimeout creates a new plugin client (http).
func NewClientWithTimeout(addr string, tlsConfig *tlsconfig.Options, timeoutInSecs int) (*Client, error) {
clientTransport, err := newTransport(addr, tlsConfig)
if err != nil {
return nil, err
}
return newClientWithTransport(clientTransport, timeoutInSecs), nil
}
// newClientWithTransport creates a new plugin client with a given transport.
func newClientWithTransport(tr transport.Transport, timeoutInSecs int) *Client {
return &Client{
http: &http.Client{
Transport: tr,
Timeout: time.Duration(timeoutInSecs) * time.Second,
},
requestFactory: tr,
}

View file

@ -41,9 +41,14 @@ type plugins struct {
plugins map[string]*Plugin
}
type extpointHandlers struct {
sync.RWMutex
extpointHandlers map[string][]func(string, *Client)
}
var (
storage = plugins{plugins: make(map[string]*Plugin)}
extpointHandlers = make(map[string]func(string, *Client))
storage = plugins{plugins: make(map[string]*Plugin)}
handlers = extpointHandlers{extpointHandlers: make(map[string][]func(string, *Client))}
)
// Manifest lists what a plugin implements.
@ -128,13 +133,17 @@ func (p *Plugin) activateWithLock() error {
p.Manifest = m
handlers.RLock()
for _, iface := range m.Implements {
handler, handled := extpointHandlers[iface]
hdlrs, handled := handlers.extpointHandlers[iface]
if !handled {
continue
}
handler(p.name, p.client)
for _, handler := range hdlrs {
handler(p.name, p.client)
}
}
handlers.RUnlock()
return nil
}
@ -226,7 +235,18 @@ func Get(name, imp string) (*Plugin, error) {
// Handle adds the specified function to the extpointHandlers.
func Handle(iface string, fn func(string, *Client)) {
extpointHandlers[iface] = fn
handlers.Lock()
hdlrs, ok := handlers.extpointHandlers[iface]
if !ok {
hdlrs = []func(string, *Client){}
}
hdlrs = append(hdlrs, fn)
handlers.extpointHandlers[iface] = hdlrs
for _, p := range storage.plugins {
p.activated = false
}
handlers.Unlock()
}
// GetAll returns all the plugins for the specified implementation

View file

@ -19,30 +19,26 @@ import (
var (
// BufioReader32KPool is a pool which returns bufio.Reader with a 32K buffer.
BufioReader32KPool *BufioReaderPool
BufioReader32KPool = newBufioReaderPoolWithSize(buffer32K)
// BufioWriter32KPool is a pool which returns bufio.Writer with a 32K buffer.
BufioWriter32KPool *BufioWriterPool
BufioWriter32KPool = newBufioWriterPoolWithSize(buffer32K)
)
const buffer32K = 32 * 1024
// BufioReaderPool is a bufio reader that uses sync.Pool.
type BufioReaderPool struct {
pool *sync.Pool
}
func init() {
BufioReader32KPool = newBufioReaderPoolWithSize(buffer32K)
BufioWriter32KPool = newBufioWriterPoolWithSize(buffer32K)
pool sync.Pool
}
// newBufioReaderPoolWithSize is unexported because new pools should be
// added here to be shared where required.
func newBufioReaderPoolWithSize(size int) *BufioReaderPool {
pool := &sync.Pool{
New: func() interface{} { return bufio.NewReaderSize(nil, size) },
return &BufioReaderPool{
pool: sync.Pool{
New: func() interface{} { return bufio.NewReaderSize(nil, size) },
},
}
return &BufioReaderPool{pool: pool}
}
// Get returns a bufio.Reader which reads from r. The buffer size is that of the pool.
@ -80,16 +76,17 @@ func (bufPool *BufioReaderPool) NewReadCloserWrapper(buf *bufio.Reader, r io.Rea
// BufioWriterPool is a bufio writer that uses sync.Pool.
type BufioWriterPool struct {
pool *sync.Pool
pool sync.Pool
}
// newBufioWriterPoolWithSize is unexported because new pools should be
// added here to be shared where required.
func newBufioWriterPoolWithSize(size int) *BufioWriterPool {
pool := &sync.Pool{
New: func() interface{} { return bufio.NewWriterSize(nil, size) },
return &BufioWriterPool{
pool: sync.Pool{
New: func() interface{} { return bufio.NewWriterSize(nil, size) },
},
}
return &BufioWriterPool{pool: pool}
}
// Get returns a bufio.Writer which writes to w. The buffer size is that of the pool.

View file

@ -1,4 +1,4 @@
## reexec
# reexec
The `reexec` package facilitates the busybox style reexec of the docker binary that we require because
of the forking limitations of using Go. Handlers can be registered with a name and the argv 0 of

View file

@ -0,0 +1 @@
This package provides helper functions for dealing with strings

View file

@ -0,0 +1,101 @@
// Package stringutils provides helper functions for dealing with strings.
package stringutils
import (
"bytes"
"math/rand"
"strings"
"github.com/docker/docker/pkg/random"
)
// GenerateRandomAlphaOnlyString generates an alphabetical random string with length n.
func GenerateRandomAlphaOnlyString(n int) string {
// make a really long string
letters := []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
b := make([]byte, n)
for i := range b {
b[i] = letters[random.Rand.Intn(len(letters))]
}
return string(b)
}
// GenerateRandomASCIIString generates an ASCII random string with length n.
func GenerateRandomASCIIString(n int) string {
chars := "abcdefghijklmnopqrstuvwxyz" +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
"~!@#$%^&*()-_+={}[]\\|<,>.?/\"';:` "
res := make([]byte, n)
for i := 0; i < n; i++ {
res[i] = chars[rand.Intn(len(chars))]
}
return string(res)
}
// Ellipsis truncates a string to fit within maxlen, and appends ellipsis (...).
// For maxlen of 3 and lower, no ellipsis is appended.
func Ellipsis(s string, maxlen int) string {
r := []rune(s)
if len(r) <= maxlen {
return s
}
if maxlen <= 3 {
return string(r[:maxlen])
}
return string(r[:maxlen-3]) + "..."
}
// Truncate truncates a string to maxlen.
func Truncate(s string, maxlen int) string {
r := []rune(s)
if len(r) <= maxlen {
return s
}
return string(r[:maxlen])
}
// InSlice tests whether a string is contained in a slice of strings or not.
// Comparison is case insensitive
func InSlice(slice []string, s string) bool {
for _, ss := range slice {
if strings.ToLower(s) == strings.ToLower(ss) {
return true
}
}
return false
}
func quote(word string, buf *bytes.Buffer) {
// Bail out early for "simple" strings
if word != "" && !strings.ContainsAny(word, "\\'\"`${[|&;<>()~*?! \t\n") {
buf.WriteString(word)
return
}
buf.WriteString("'")
for i := 0; i < len(word); i++ {
b := word[i]
if b == '\'' {
// Replace literal ' with a close ', a \', and a open '
buf.WriteString("'\\''")
} else {
buf.WriteByte(b)
}
}
buf.WriteString("'")
}
// ShellQuoteArguments takes a list of strings and escapes them so they will be
// handled right when passed as arguments to a program via a shell
func ShellQuoteArguments(args []string) string {
var buf bytes.Buffer
for i, arg := range args {
if i != 0 {
buf.WriteByte(' ')
}
quote(arg, &buf)
}
return buf.String()
}

View file

@ -6,6 +6,8 @@ package system
import (
"syscall"
"unsafe"
"golang.org/x/sys/windows"
)
var (
@ -67,7 +69,7 @@ func PulseEvent(handle syscall.Handle) (err error) {
return setResetPulse(handle, procPulseEvent)
}
func setResetPulse(handle syscall.Handle, proc *syscall.LazyProc) (err error) {
func setResetPulse(handle syscall.Handle, proc *windows.LazyProc) (err error) {
r0, _, _ := proc.Call(uintptr(handle))
if r0 != 0 {
err = syscall.Errno(r0)

View file

@ -0,0 +1,33 @@
package system
import (
"fmt"
"os/exec"
"syscall"
)
// GetExitCode returns the ExitStatus of the specified error if its type is
// exec.ExitError, returns 0 and an error otherwise.
func GetExitCode(err error) (int, error) {
exitCode := 0
if exiterr, ok := err.(*exec.ExitError); ok {
if procExit, ok := exiterr.Sys().(syscall.WaitStatus); ok {
return procExit.ExitStatus(), nil
}
}
return exitCode, fmt.Errorf("failed to get exit code")
}
// ProcessExitCode process the specified error and returns the exit status code
// if the error was of type exec.ExitError, returns nothing otherwise.
func ProcessExitCode(err error) (exitCode int) {
if err != nil {
var exiterr error
if exitCode, exiterr = GetExitCode(err); exiterr != nil {
// TODO: Fix this so we check the error's text.
// we've failed to retrieve exit code, so we set it to 127
exitCode = 127
}
}
return
}

View file

@ -7,6 +7,12 @@ import (
"path/filepath"
)
// MkdirAllWithACL is a wrapper for MkdirAll that creates a directory
// ACL'd for Builtin Administrators and Local System.
func MkdirAllWithACL(path string, perm os.FileMode) error {
return MkdirAll(path, perm)
}
// MkdirAll creates a directory named path along with any necessary parents,
// with permission specified by attribute perm for all dir created.
func MkdirAll(path string, perm os.FileMode) error {
@ -17,3 +23,32 @@ func MkdirAll(path string, perm os.FileMode) error {
func IsAbs(path string) bool {
return filepath.IsAbs(path)
}
// The functions below here are wrappers for the equivalents in the os package.
// They are passthrough on Unix platforms, and only relevant on Windows.
// CreateSequential creates the named file with mode 0666 (before umask), truncating
// it if it already exists. If successful, methods on the returned
// File can be used for I/O; the associated file descriptor has mode
// O_RDWR.
// If there is an error, it will be of type *PathError.
func CreateSequential(name string) (*os.File, error) {
return os.Create(name)
}
// OpenSequential opens the named file for reading. If successful, methods on
// the returned file can be used for reading; the associated file
// descriptor has mode O_RDONLY.
// If there is an error, it will be of type *PathError.
func OpenSequential(name string) (*os.File, error) {
return os.Open(name)
}
// OpenFileSequential is the generalized open call; most users will use Open
// or Create instead. It opens the named file with specified flag
// (O_RDONLY etc.) and perm, (0666 etc.) if applicable. If successful,
// methods on the returned File can be used for I/O.
// If there is an error, it will be of type *PathError.
func OpenFileSequential(name string, flag int, perm os.FileMode) (*os.File, error) {
return os.OpenFile(name, flag, perm)
}

View file

@ -8,15 +8,31 @@ import (
"regexp"
"strings"
"syscall"
"unsafe"
winio "github.com/Microsoft/go-winio"
)
// MkdirAllWithACL is a wrapper for MkdirAll that creates a directory
// ACL'd for Builtin Administrators and Local System.
func MkdirAllWithACL(path string, perm os.FileMode) error {
return mkdirall(path, true)
}
// MkdirAll implementation that is volume path aware for Windows.
func MkdirAll(path string, perm os.FileMode) error {
func MkdirAll(path string, _ os.FileMode) error {
return mkdirall(path, false)
}
// mkdirall is a custom version of os.MkdirAll modified for use on Windows
// so that it is both volume path aware, and can create a directory with
// a DACL.
func mkdirall(path string, adminAndLocalSystem bool) error {
if re := regexp.MustCompile(`^\\\\\?\\Volume{[a-z0-9-]+}$`); re.MatchString(path) {
return nil
}
// The rest of this method is copied from os.MkdirAll and should be kept
// The rest of this method is largely copied from os.MkdirAll and should be kept
// as-is to ensure compatibility.
// Fast path: if we can tell whether path is a directory or file, stop with success or error.
@ -45,14 +61,19 @@ func MkdirAll(path string, perm os.FileMode) error {
if j > 1 {
// Create parent
err = MkdirAll(path[0:j-1], perm)
err = mkdirall(path[0:j-1], false)
if err != nil {
return err
}
}
// Parent now exists; invoke Mkdir and use its result.
err = os.Mkdir(path, perm)
// Parent now exists; invoke os.Mkdir or mkdirWithACL and use its result.
if adminAndLocalSystem {
err = mkdirWithACL(path)
} else {
err = os.Mkdir(path, 0)
}
if err != nil {
// Handle arguments like "foo/." by
// double-checking that directory doesn't exist.
@ -65,6 +86,36 @@ func MkdirAll(path string, perm os.FileMode) error {
return nil
}
// mkdirWithACL creates a new directory. If there is an error, it will be of
// type *PathError. .
//
// This is a modified and combined version of os.Mkdir and syscall.Mkdir
// in golang to cater for creating a directory am ACL permitting full
// access, with inheritance, to any subfolder/file for Built-in Administrators
// and Local System.
func mkdirWithACL(name string) error {
sa := syscall.SecurityAttributes{Length: 0}
sddl := "D:P(A;OICI;GA;;;BA)(A;OICI;GA;;;SY)"
sd, err := winio.SddlToSecurityDescriptor(sddl)
if err != nil {
return &os.PathError{Op: "mkdir", Path: name, Err: err}
}
sa.Length = uint32(unsafe.Sizeof(sa))
sa.InheritHandle = 1
sa.SecurityDescriptor = uintptr(unsafe.Pointer(&sd[0]))
namep, err := syscall.UTF16PtrFromString(name)
if err != nil {
return &os.PathError{Op: "mkdir", Path: name, Err: err}
}
e := syscall.CreateDirectory(namep, &sa)
if e != nil {
return &os.PathError{Op: "mkdir", Path: name, Err: e}
}
return nil
}
// IsAbs is a platform-specific wrapper for filepath.IsAbs. On Windows,
// golang filepath.IsAbs does not consider a path \windows\system32 as absolute
// as it doesn't start with a drive-letter/colon combination. However, in
@ -80,3 +131,106 @@ func IsAbs(path string) bool {
}
return true
}
// The origin of the functions below here are the golang OS and syscall packages,
// slightly modified to only cope with files, not directories due to the
// specific use case.
//
// The alteration is to allow a file on Windows to be opened with
// FILE_FLAG_SEQUENTIAL_SCAN (particular for docker load), to avoid eating
// the standby list, particularly when accessing large files such as layer.tar.
// CreateSequential creates the named file with mode 0666 (before umask), truncating
// it if it already exists. If successful, methods on the returned
// File can be used for I/O; the associated file descriptor has mode
// O_RDWR.
// If there is an error, it will be of type *PathError.
func CreateSequential(name string) (*os.File, error) {
return OpenFileSequential(name, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0)
}
// OpenSequential opens the named file for reading. If successful, methods on
// the returned file can be used for reading; the associated file
// descriptor has mode O_RDONLY.
// If there is an error, it will be of type *PathError.
func OpenSequential(name string) (*os.File, error) {
return OpenFileSequential(name, os.O_RDONLY, 0)
}
// OpenFileSequential is the generalized open call; most users will use Open
// or Create instead.
// If there is an error, it will be of type *PathError.
func OpenFileSequential(name string, flag int, _ os.FileMode) (*os.File, error) {
if name == "" {
return nil, &os.PathError{Op: "open", Path: name, Err: syscall.ENOENT}
}
r, errf := syscallOpenFileSequential(name, flag, 0)
if errf == nil {
return r, nil
}
return nil, &os.PathError{Op: "open", Path: name, Err: errf}
}
func syscallOpenFileSequential(name string, flag int, _ os.FileMode) (file *os.File, err error) {
r, e := syscallOpenSequential(name, flag|syscall.O_CLOEXEC, 0)
if e != nil {
return nil, e
}
return os.NewFile(uintptr(r), name), nil
}
func makeInheritSa() *syscall.SecurityAttributes {
var sa syscall.SecurityAttributes
sa.Length = uint32(unsafe.Sizeof(sa))
sa.InheritHandle = 1
return &sa
}
func syscallOpenSequential(path string, mode int, _ uint32) (fd syscall.Handle, err error) {
if len(path) == 0 {
return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return syscall.InvalidHandle, err
}
var access uint32
switch mode & (syscall.O_RDONLY | syscall.O_WRONLY | syscall.O_RDWR) {
case syscall.O_RDONLY:
access = syscall.GENERIC_READ
case syscall.O_WRONLY:
access = syscall.GENERIC_WRITE
case syscall.O_RDWR:
access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
}
if mode&syscall.O_CREAT != 0 {
access |= syscall.GENERIC_WRITE
}
if mode&syscall.O_APPEND != 0 {
access &^= syscall.GENERIC_WRITE
access |= syscall.FILE_APPEND_DATA
}
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE)
var sa *syscall.SecurityAttributes
if mode&syscall.O_CLOEXEC == 0 {
sa = makeInheritSa()
}
var createmode uint32
switch {
case mode&(syscall.O_CREAT|syscall.O_EXCL) == (syscall.O_CREAT | syscall.O_EXCL):
createmode = syscall.CREATE_NEW
case mode&(syscall.O_CREAT|syscall.O_TRUNC) == (syscall.O_CREAT | syscall.O_TRUNC):
createmode = syscall.CREATE_ALWAYS
case mode&syscall.O_CREAT == syscall.O_CREAT:
createmode = syscall.OPEN_ALWAYS
case mode&syscall.O_TRUNC == syscall.O_TRUNC:
createmode = syscall.TRUNCATE_EXISTING
default:
createmode = syscall.OPEN_EXISTING
}
// Use FILE_FLAG_SEQUENTIAL_SCAN rather than FILE_ATTRIBUTE_NORMAL as implemented in golang.
//https://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
const fileFlagSequentialScan = 0x08000000 // FILE_FLAG_SEQUENTIAL_SCAN
h, e := syscall.CreateFile(pathp, access, sharemode, sa, createmode, fileFlagSequentialScan, 0)
return h, e
}

View file

@ -89,7 +89,7 @@ func ReadMemInfo() (*MemInfo, error) {
if ppKernel < 0 || MemTotal < 0 || MemFree < 0 || SwapTotal < 0 ||
SwapFree < 0 {
return nil, fmt.Errorf("Error getting system memory info %v\n", err)
return nil, fmt.Errorf("error getting system memory info %v\n", err)
}
meminfo := &MemInfo{}

View file

@ -1,12 +1,13 @@
package system
import (
"syscall"
"unsafe"
"golang.org/x/sys/windows"
)
var (
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx")
)

View file

@ -53,6 +53,8 @@ func GetOSVersion() OSVersion {
}
// IsWindowsClient returns true if the SKU is client
// @engine maintainers - this function should not be removed or modified as it
// is used to enforce licensing restrictions on Windows.
func IsWindowsClient() bool {
osviex := &osVersionInfoEx{OSVersionInfoSize: 284}
r1, _, err := procGetVersionExW.Call(uintptr(unsafe.Pointer(osviex)))

View file

@ -70,6 +70,11 @@ func ParseNamed(s string) (Named, error) {
return r, nil
}
// TrimNamed removes any tag or digest from the named reference
func TrimNamed(ref Named) Named {
return &namedRef{distreference.TrimNamed(ref)}
}
// WithName returns a named object representing the given string. If the input
// is invalid ErrReferenceInvalidFormat will be returned.
func WithName(name string) (Named, error) {

View file

@ -0,0 +1,22 @@
Copyright (c) 2015 Matthew Heon <mheon@redhat.com>
Copyright (c) 2015 Paul Moore <pmoore@redhat.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,26 @@
libseccomp-golang: Go Language Bindings for the libseccomp Project
===============================================================================
https://github.com/seccomp/libseccomp-golang
https://github.com/seccomp/libseccomp
The libseccomp library provides an easy to use, platform independent, interface
to the Linux Kernel's syscall filtering mechanism. The libseccomp API is
designed to abstract away the underlying BPF based syscall filter language and
present a more conventional function-call based filtering interface that should
be familiar to, and easily adopted by, application developers.
The libseccomp-golang library provides a Go based interface to the libseccomp
library.
* Online Resources
The library source repository currently lives on GitHub at the following URLs:
-> https://github.com/seccomp/libseccomp-golang
-> https://github.com/seccomp/libseccomp
The project mailing list is currently hosted on Google Groups at the URL below,
please note that a Google account is not required to subscribe to the mailing
list.
-> https://groups.google.com/d/forum/libseccomp

View file

@ -0,0 +1,857 @@
// +build linux
// Public API specification for libseccomp Go bindings
// Contains public API for the bindings
// Package seccomp provides bindings for libseccomp, a library wrapping the Linux
// seccomp syscall. Seccomp enables an application to restrict system call use
// for itself and its children.
package seccomp
import (
"fmt"
"os"
"runtime"
"strings"
"sync"
"syscall"
"unsafe"
)
// C wrapping code
// #cgo pkg-config: libseccomp
// #include <stdlib.h>
// #include <seccomp.h>
import "C"
// Exported types
// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a
// per-architecture basis.
type ScmpArch uint
// ScmpAction represents an action to be taken on a filter rule match in
// libseccomp
type ScmpAction uint
// ScmpCompareOp represents a comparison operator which can be used in a filter
// rule
type ScmpCompareOp uint
// ScmpCondition represents a rule in a libseccomp filter context
type ScmpCondition struct {
Argument uint `json:"argument,omitempty"`
Op ScmpCompareOp `json:"operator,omitempty"`
Operand1 uint64 `json:"operand_one,omitempty"`
Operand2 uint64 `json:"operand_two,omitempty"`
}
// ScmpSyscall represents a Linux System Call
type ScmpSyscall int32
// Exported Constants
const (
// Valid architectures recognized by libseccomp
// ARM64 and all MIPS architectures are unsupported by versions of the
// library before v2.2 and will return errors if used
// ArchInvalid is a placeholder to ensure uninitialized ScmpArch
// variables are invalid
ArchInvalid ScmpArch = iota
// ArchNative is the native architecture of the kernel
ArchNative ScmpArch = iota
// ArchX86 represents 32-bit x86 syscalls
ArchX86 ScmpArch = iota
// ArchAMD64 represents 64-bit x86-64 syscalls
ArchAMD64 ScmpArch = iota
// ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers)
ArchX32 ScmpArch = iota
// ArchARM represents 32-bit ARM syscalls
ArchARM ScmpArch = iota
// ArchARM64 represents 64-bit ARM syscalls
ArchARM64 ScmpArch = iota
// ArchMIPS represents 32-bit MIPS syscalls
ArchMIPS ScmpArch = iota
// ArchMIPS64 represents 64-bit MIPS syscalls
ArchMIPS64 ScmpArch = iota
// ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers)
ArchMIPS64N32 ScmpArch = iota
// ArchMIPSEL represents 32-bit MIPS syscalls (little endian)
ArchMIPSEL ScmpArch = iota
// ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian)
ArchMIPSEL64 ScmpArch = iota
// ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian,
// 32-bit pointers)
ArchMIPSEL64N32 ScmpArch = iota
// ArchPPC represents 32-bit POWERPC syscalls
ArchPPC ScmpArch = iota
// ArchPPC64 represents 64-bit POWER syscalls (big endian)
ArchPPC64 ScmpArch = iota
// ArchPPC64LE represents 64-bit POWER syscalls (little endian)
ArchPPC64LE ScmpArch = iota
// ArchS390 represents 31-bit System z/390 syscalls
ArchS390 ScmpArch = iota
// ArchS390X represents 64-bit System z/390 syscalls
ArchS390X ScmpArch = iota
)
const (
// Supported actions on filter match
// ActInvalid is a placeholder to ensure uninitialized ScmpAction
// variables are invalid
ActInvalid ScmpAction = iota
// ActKill kills the process
ActKill ScmpAction = iota
// ActTrap throws SIGSYS
ActTrap ScmpAction = iota
// ActErrno causes the syscall to return a negative error code. This
// code can be set with the SetReturnCode method
ActErrno ScmpAction = iota
// ActTrace causes the syscall to notify tracing processes with the
// given error code. This code can be set with the SetReturnCode method
ActTrace ScmpAction = iota
// ActAllow permits the syscall to continue execution
ActAllow ScmpAction = iota
)
const (
// These are comparison operators used in conditional seccomp rules
// They are used to compare the value of a single argument of a syscall
// against a user-defined constant
// CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp
// variables are invalid
CompareInvalid ScmpCompareOp = iota
// CompareNotEqual returns true if the argument is not equal to the
// given value
CompareNotEqual ScmpCompareOp = iota
// CompareLess returns true if the argument is less than the given value
CompareLess ScmpCompareOp = iota
// CompareLessOrEqual returns true if the argument is less than or equal
// to the given value
CompareLessOrEqual ScmpCompareOp = iota
// CompareEqual returns true if the argument is equal to the given value
CompareEqual ScmpCompareOp = iota
// CompareGreaterEqual returns true if the argument is greater than or
// equal to the given value
CompareGreaterEqual ScmpCompareOp = iota
// CompareGreater returns true if the argument is greater than the given
// value
CompareGreater ScmpCompareOp = iota
// CompareMaskedEqual returns true if the argument is equal to the given
// value, when masked (bitwise &) against the second given value
CompareMaskedEqual ScmpCompareOp = iota
)
// Helpers for types
// GetArchFromString returns an ScmpArch constant from a string representing an
// architecture
func GetArchFromString(arch string) (ScmpArch, error) {
switch strings.ToLower(arch) {
case "x86":
return ArchX86, nil
case "amd64", "x86-64", "x86_64", "x64":
return ArchAMD64, nil
case "x32":
return ArchX32, nil
case "arm":
return ArchARM, nil
case "arm64", "aarch64":
return ArchARM64, nil
case "mips":
return ArchMIPS, nil
case "mips64":
return ArchMIPS64, nil
case "mips64n32":
return ArchMIPS64N32, nil
case "mipsel":
return ArchMIPSEL, nil
case "mipsel64":
return ArchMIPSEL64, nil
case "mipsel64n32":
return ArchMIPSEL64N32, nil
case "ppc":
return ArchPPC, nil
case "ppc64":
return ArchPPC64, nil
case "ppc64le":
return ArchPPC64LE, nil
case "s390":
return ArchS390, nil
case "s390x":
return ArchS390X, nil
default:
return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %s", arch)
}
}
// String returns a string representation of an architecture constant
func (a ScmpArch) String() string {
switch a {
case ArchX86:
return "x86"
case ArchAMD64:
return "amd64"
case ArchX32:
return "x32"
case ArchARM:
return "arm"
case ArchARM64:
return "arm64"
case ArchMIPS:
return "mips"
case ArchMIPS64:
return "mips64"
case ArchMIPS64N32:
return "mips64n32"
case ArchMIPSEL:
return "mipsel"
case ArchMIPSEL64:
return "mipsel64"
case ArchMIPSEL64N32:
return "mipsel64n32"
case ArchPPC:
return "ppc"
case ArchPPC64:
return "ppc64"
case ArchPPC64LE:
return "ppc64le"
case ArchS390:
return "s390"
case ArchS390X:
return "s390x"
case ArchNative:
return "native"
case ArchInvalid:
return "Invalid architecture"
default:
return "Unknown architecture"
}
}
// String returns a string representation of a comparison operator constant
func (a ScmpCompareOp) String() string {
switch a {
case CompareNotEqual:
return "Not equal"
case CompareLess:
return "Less than"
case CompareLessOrEqual:
return "Less than or equal to"
case CompareEqual:
return "Equal"
case CompareGreaterEqual:
return "Greater than or equal to"
case CompareGreater:
return "Greater than"
case CompareMaskedEqual:
return "Masked equality"
case CompareInvalid:
return "Invalid comparison operator"
default:
return "Unrecognized comparison operator"
}
}
// String returns a string representation of a seccomp match action
func (a ScmpAction) String() string {
switch a & 0xFFFF {
case ActKill:
return "Action: Kill Process"
case ActTrap:
return "Action: Send SIGSYS"
case ActErrno:
return fmt.Sprintf("Action: Return error code %d", (a >> 16))
case ActTrace:
return fmt.Sprintf("Action: Notify tracing processes with code %d",
(a >> 16))
case ActAllow:
return "Action: Allow system call"
default:
return "Unrecognized Action"
}
}
// SetReturnCode adds a return code to a supporting ScmpAction, clearing any
// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise.
// Accepts 16-bit return code as argument.
// Returns a valid ScmpAction of the original type with the new error code set.
func (a ScmpAction) SetReturnCode(code int16) ScmpAction {
aTmp := a & 0x0000FFFF
if aTmp == ActErrno || aTmp == ActTrace {
return (aTmp | (ScmpAction(code)&0xFFFF)<<16)
}
return a
}
// GetReturnCode returns the return code of an ScmpAction
func (a ScmpAction) GetReturnCode() int16 {
return int16(a >> 16)
}
// General utility functions
// GetLibraryVersion returns the version of the library the bindings are built
// against.
// The version is formatted as follows: Major.Minor.Micro
func GetLibraryVersion() (major, minor, micro int) {
return verMajor, verMinor, verMicro
}
// Syscall functions
// GetName retrieves the name of a syscall from its number.
// Acts on any syscall number.
// Returns either a string containing the name of the syscall, or an error.
func (s ScmpSyscall) GetName() (string, error) {
return s.GetNameByArch(ArchNative)
}
// GetNameByArch retrieves the name of a syscall from its number for a given
// architecture.
// Acts on any syscall number.
// Accepts a valid architecture constant.
// Returns either a string containing the name of the syscall, or an error.
// if the syscall is unrecognized or an issue occurred.
func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) {
if err := sanitizeArch(arch); err != nil {
return "", err
}
cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s))
if cString == nil {
return "", fmt.Errorf("could not resolve syscall name")
}
defer C.free(unsafe.Pointer(cString))
finalStr := C.GoString(cString)
return finalStr, nil
}
// GetSyscallFromName returns the number of a syscall by name on the kernel's
// native architecture.
// Accepts a string containing the name of a syscall.
// Returns the number of the syscall, or an error if no syscall with that name
// was found.
func GetSyscallFromName(name string) (ScmpSyscall, error) {
cString := C.CString(name)
defer C.free(unsafe.Pointer(cString))
result := C.seccomp_syscall_resolve_name(cString)
if result == scmpError {
return 0, fmt.Errorf("could not resolve name to syscall")
}
return ScmpSyscall(result), nil
}
// GetSyscallFromNameByArch returns the number of a syscall by name for a given
// architecture's ABI.
// Accepts the name of a syscall and an architecture constant.
// Returns the number of the syscall, or an error if an invalid architecture is
// passed or a syscall with that name was not found.
func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) {
if err := sanitizeArch(arch); err != nil {
return 0, err
}
cString := C.CString(name)
defer C.free(unsafe.Pointer(cString))
result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString)
if result == scmpError {
return 0, fmt.Errorf("could not resolve name to syscall")
}
return ScmpSyscall(result), nil
}
// MakeCondition creates and returns a new condition to attach to a filter rule.
// Associated rules will only match if this condition is true.
// Accepts the number the argument we are checking, and a comparison operator
// and value to compare to.
// The rule will match if argument $arg (zero-indexed) of the syscall is
// $COMPARE_OP the provided comparison value.
// Some comparison operators accept two values. Masked equals, for example,
// will mask $arg of the syscall with the second value provided (via bitwise
// AND) and then compare against the first value provided.
// For example, in the less than or equal case, if the syscall argument was
// 0 and the value provided was 1, the condition would match, as 0 is less
// than or equal to 1.
// Return either an error on bad argument or a valid ScmpCondition struct.
func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) {
var condStruct ScmpCondition
if comparison == CompareInvalid {
return condStruct, fmt.Errorf("invalid comparison operator")
} else if arg > 5 {
return condStruct, fmt.Errorf("syscalls only have up to 6 arguments")
} else if len(values) > 2 {
return condStruct, fmt.Errorf("conditions can have at most 2 arguments")
} else if len(values) == 0 {
return condStruct, fmt.Errorf("must provide at least one value to compare against")
}
condStruct.Argument = arg
condStruct.Op = comparison
condStruct.Operand1 = values[0]
if len(values) == 2 {
condStruct.Operand2 = values[1]
} else {
condStruct.Operand2 = 0 // Unused
}
return condStruct, nil
}
// Utility Functions
// GetNativeArch returns architecture token representing the native kernel
// architecture
func GetNativeArch() (ScmpArch, error) {
arch := C.seccomp_arch_native()
return archFromNative(arch)
}
// Public Filter API
// ScmpFilter represents a filter context in libseccomp.
// A filter context is initially empty. Rules can be added to it, and it can
// then be loaded into the kernel.
type ScmpFilter struct {
filterCtx C.scmp_filter_ctx
valid bool
lock sync.Mutex
}
// NewFilter creates and returns a new filter context.
// Accepts a default action to be taken for syscalls which match no rules in
// the filter.
// Returns a reference to a valid filter context, or nil and an error if the
// filter context could not be created or an invalid default action was given.
func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) {
if err := sanitizeAction(defaultAction); err != nil {
return nil, err
}
fPtr := C.seccomp_init(defaultAction.toNative())
if fPtr == nil {
return nil, fmt.Errorf("could not create filter")
}
filter := new(ScmpFilter)
filter.filterCtx = fPtr
filter.valid = true
runtime.SetFinalizer(filter, filterFinalizer)
return filter, nil
}
// IsValid determines whether a filter context is valid to use.
// Some operations (Release and Merge) render filter contexts invalid and
// consequently prevent further use.
func (f *ScmpFilter) IsValid() bool {
f.lock.Lock()
defer f.lock.Unlock()
return f.valid
}
// Reset resets a filter context, removing all its existing state.
// Accepts a new default action to be taken for syscalls which do not match.
// Returns an error if the filter or action provided are invalid.
func (f *ScmpFilter) Reset(defaultAction ScmpAction) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeAction(defaultAction); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative())
if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Release releases a filter context, freeing its memory. Should be called after
// loading into the kernel, when the filter is no longer needed.
// After calling this function, the given filter is no longer valid and cannot
// be used.
// Release() will be invoked automatically when a filter context is garbage
// collected, but can also be called manually to free memory.
func (f *ScmpFilter) Release() {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return
}
f.valid = false
C.seccomp_release(f.filterCtx)
}
// Merge merges two filter contexts.
// The source filter src will be released as part of the process, and will no
// longer be usable or valid after this call.
// To be merged, filters must NOT share any architectures, and all their
// attributes (Default Action, Bad Arch Action, No New Privs and TSync bools)
// must match.
// The filter src will be merged into the filter this is called on.
// The architectures of the src filter not present in the destination, and all
// associated rules, will be added to the destination.
// Returns an error if merging the filters failed.
func (f *ScmpFilter) Merge(src *ScmpFilter) error {
f.lock.Lock()
defer f.lock.Unlock()
src.lock.Lock()
defer src.lock.Unlock()
if !src.valid || !f.valid {
return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized")
}
// Merge the filters
retCode := C.seccomp_merge(f.filterCtx, src.filterCtx)
if syscall.Errno(-1*retCode) == syscall.EINVAL {
return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter")
} else if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
src.valid = false
return nil
}
// IsArchPresent checks if an architecture is present in a filter.
// If a filter contains an architecture, it uses its default action for
// syscalls which do not match rules in it, and its rules can match syscalls
// for that ABI.
// If a filter does not contain an architecture, all syscalls made to that
// kernel ABI will fail with the filter's default Bad Architecture Action
// (by default, killing the process).
// Accepts an architecture constant.
// Returns true if the architecture is present in the filter, false otherwise,
// and an error on an invalid filter context, architecture constant, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return false, err
} else if !f.valid {
return false, errBadFilter
}
retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative())
if syscall.Errno(-1*retCode) == syscall.EEXIST {
// -EEXIST is "arch not present"
return false, nil
} else if retCode != 0 {
return false, syscall.Errno(-1 * retCode)
}
return true, nil
}
// AddArch adds an architecture to the filter.
// Accepts an architecture constant.
// Returns an error on invalid filter context or architecture token, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) AddArch(arch ScmpArch) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
// Libseccomp returns -EEXIST if the specified architecture is already
// present. Succeed silently in this case, as it's not fatal, and the
// architecture is present already.
retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative())
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
return syscall.Errno(-1 * retCode)
}
return nil
}
// RemoveArch removes an architecture from the filter.
// Accepts an architecture constant.
// Returns an error on invalid filter context or architecture token, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) RemoveArch(arch ScmpArch) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
// Similar to AddArch, -EEXIST is returned if the arch is not present
// Succeed silently in that case, this is not fatal and the architecture
// is not present in the filter after RemoveArch
retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative())
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Load loads a filter context into the kernel.
// Returns an error if the filter context is invalid or the syscall failed.
func (f *ScmpFilter) Load() error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_load(f.filterCtx); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// GetDefaultAction returns the default action taken on a syscall which does not
// match a rule in the filter, or an error if an issue was encountered
// retrieving the value.
func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) {
action, err := f.getFilterAttr(filterAttrActDefault)
if err != nil {
return 0x0, err
}
return actionFromNative(action)
}
// GetBadArchAction returns the default action taken on a syscall for an
// architecture not in the filter, or an error if an issue was encountered
// retrieving the value.
func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) {
action, err := f.getFilterAttr(filterAttrActBadArch)
if err != nil {
return 0x0, err
}
return actionFromNative(action)
}
// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set
// to on the filter being loaded, or an error if an issue was encountered
// retrieving the value.
// The No New Privileges bit tells the kernel that new processes run with exec()
// cannot gain more privileges than the process that ran exec().
// For example, a process with No New Privileges set would be unable to exec
// setuid/setgid executables.
func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) {
noNewPrivs, err := f.getFilterAttr(filterAttrNNP)
if err != nil {
return false, err
}
if noNewPrivs == 0 {
return false, nil
}
return true, nil
}
// GetTsyncBit returns whether Thread Synchronization will be enabled on the
// filter being loaded, or an error if an issue was encountered retrieving the
// value.
// Thread Sync ensures that all members of the thread group of the calling
// process will share the same Seccomp filter set.
// Tsync is a fairly recent addition to the Linux kernel and older kernels
// lack support. If the running kernel does not support Tsync and it is
// requested in a filter, Libseccomp will not enable TSync support and will
// proceed as normal.
// This function is unavailable before v2.2 of libseccomp and will return an
// error.
func (f *ScmpFilter) GetTsyncBit() (bool, error) {
tSync, err := f.getFilterAttr(filterAttrTsync)
if err != nil {
return false, err
}
if tSync == 0 {
return false, nil
}
return true, nil
}
// SetBadArchAction sets the default action taken on a syscall for an
// architecture not in the filter, or an error if an issue was encountered
// setting the value.
func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error {
if err := sanitizeAction(action); err != nil {
return err
}
return f.setFilterAttr(filterAttrActBadArch, action.toNative())
}
// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be
// applied on filter load, or an error if an issue was encountered setting the
// value.
// Filters with No New Privileges set to 0 can only be loaded if the process
// has the CAP_SYS_ADMIN capability.
func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error {
var toSet C.uint32_t = 0x0
if state {
toSet = 0x1
}
return f.setFilterAttr(filterAttrNNP, toSet)
}
// SetTsync sets whether Thread Synchronization will be enabled on the filter
// being loaded. Returns an error if setting Tsync failed, or the filter is
// invalid.
// Thread Sync ensures that all members of the thread group of the calling
// process will share the same Seccomp filter set.
// Tsync is a fairly recent addition to the Linux kernel and older kernels
// lack support. If the running kernel does not support Tsync and it is
// requested in a filter, Libseccomp will not enable TSync support and will
// proceed as normal.
// This function is unavailable before v2.2 of libseccomp and will return an
// error.
func (f *ScmpFilter) SetTsync(enable bool) error {
var toSet C.uint32_t = 0x0
if enable {
toSet = 0x1
}
return f.setFilterAttr(filterAttrTsync, toSet)
}
// SetSyscallPriority sets a syscall's priority.
// This provides a hint to the filter generator in libseccomp about the
// importance of this syscall. High-priority syscalls are placed
// first in the filter code, and incur less overhead (at the expense of
// lower-priority syscalls).
func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call),
C.uint8_t(priority)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// AddRule adds a single rule for an unconditional action on a syscall.
// Accepts the number of the syscall and the action to be taken on the call
// being made.
// Returns an error if an issue was encountered adding the rule.
func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error {
return f.addRuleGeneric(call, action, false, nil)
}
// AddRuleExact adds a single rule for an unconditional action on a syscall.
// Accepts the number of the syscall and the action to be taken on the call
// being made.
// No modifications will be made to the rule, and it will fail to add if it
// cannot be applied to the current architecture without modification.
// The rule will function exactly as described, but it may not function identically
// (or be able to be applied to) all architectures.
// Returns an error if an issue was encountered adding the rule.
func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error {
return f.addRuleGeneric(call, action, true, nil)
}
// AddRuleConditional adds a single rule for a conditional action on a syscall.
// Returns an error if an issue was encountered adding the rule.
// All conditions must match for the rule to match.
// There is a bug in library versions below v2.2.1 which can, in some cases,
// cause conditions to be lost when more than one are used. Consequently,
// AddRuleConditional is disabled on library versions lower than v2.2.1
func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
return f.addRuleGeneric(call, action, false, conds)
}
// AddRuleConditionalExact adds a single rule for a conditional action on a
// syscall.
// No modifications will be made to the rule, and it will fail to add if it
// cannot be applied to the current architecture without modification.
// The rule will function exactly as described, but it may not function identically
// (or be able to be applied to) all architectures.
// Returns an error if an issue was encountered adding the rule.
// There is a bug in library versions below v2.2.1 which can, in some cases,
// cause conditions to be lost when more than one are used. Consequently,
// AddRuleConditionalExact is disabled on library versions lower than v2.2.1
func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
return f.addRuleGeneric(call, action, true, conds)
}
// ExportPFC output PFC-formatted, human-readable dump of a filter context's
// rules to a file.
// Accepts file to write to (must be open for writing).
// Returns an error if writing to the file fails.
func (f *ScmpFilter) ExportPFC(file *os.File) error {
f.lock.Lock()
defer f.lock.Unlock()
fd := file.Fd()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a
// filter context's rules to a file.
// Accepts file to write to (must be open for writing).
// Returns an error if writing to the file fails.
func (f *ScmpFilter) ExportBPF(file *os.File) error {
f.lock.Lock()
defer f.lock.Unlock()
fd := file.Fd()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}

View file

@ -0,0 +1,506 @@
// +build linux
// Internal functions for libseccomp Go bindings
// No exported functions
package seccomp
import (
"fmt"
"os"
"syscall"
)
// Unexported C wrapping code - provides the C-Golang interface
// Get the seccomp header in scope
// Need stdlib.h for free() on cstrings
// #cgo pkg-config: libseccomp
/*
#include <stdlib.h>
#include <seccomp.h>
#if SCMP_VER_MAJOR < 2
#error Minimum supported version of Libseccomp is v2.1.0
#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 1
#error Minimum supported version of Libseccomp is v2.1.0
#endif
#define ARCH_BAD ~0
const uint32_t C_ARCH_BAD = ARCH_BAD;
#ifndef SCMP_ARCH_AARCH64
#define SCMP_ARCH_AARCH64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS
#define SCMP_ARCH_MIPS ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS64
#define SCMP_ARCH_MIPS64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS64N32
#define SCMP_ARCH_MIPS64N32 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL
#define SCMP_ARCH_MIPSEL ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL64
#define SCMP_ARCH_MIPSEL64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL64N32
#define SCMP_ARCH_MIPSEL64N32 ARCH_BAD
#endif
#ifndef SCMP_ARCH_PPC
#define SCMP_ARCH_PPC ARCH_BAD
#endif
#ifndef SCMP_ARCH_PPC64
#define SCMP_ARCH_PPC64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_PPC64LE
#define SCMP_ARCH_PPC64LE ARCH_BAD
#endif
#ifndef SCMP_ARCH_S390
#define SCMP_ARCH_S390 ARCH_BAD
#endif
#ifndef SCMP_ARCH_S390X
#define SCMP_ARCH_S390X ARCH_BAD
#endif
const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE;
const uint32_t C_ARCH_X86 = SCMP_ARCH_X86;
const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64;
const uint32_t C_ARCH_X32 = SCMP_ARCH_X32;
const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM;
const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64;
const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS;
const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64;
const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32;
const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL;
const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64;
const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32;
const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC;
const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64;
const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE;
const uint32_t C_ARCH_S390 = SCMP_ARCH_S390;
const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X;
const uint32_t C_ACT_KILL = SCMP_ACT_KILL;
const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP;
const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0);
const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0);
const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW;
// If TSync is not supported, make sure it doesn't map to a supported filter attribute
// Don't worry about major version < 2, the minimum version checks should catch that case
#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2
#define SCMP_FLTATR_CTL_TSYNC _SCMP_CMP_MIN
#endif
const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT;
const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH;
const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP;
const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC;
const int C_CMP_NE = (int)SCMP_CMP_NE;
const int C_CMP_LT = (int)SCMP_CMP_LT;
const int C_CMP_LE = (int)SCMP_CMP_LE;
const int C_CMP_EQ = (int)SCMP_CMP_EQ;
const int C_CMP_GE = (int)SCMP_CMP_GE;
const int C_CMP_GT = (int)SCMP_CMP_GT;
const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ;
const int C_VERSION_MAJOR = SCMP_VER_MAJOR;
const int C_VERSION_MINOR = SCMP_VER_MINOR;
const int C_VERSION_MICRO = SCMP_VER_MICRO;
typedef struct scmp_arg_cmp* scmp_cast_t;
// Wrapper to create an scmp_arg_cmp struct
void*
make_struct_arg_cmp(
unsigned int arg,
int compare,
uint64_t a,
uint64_t b
)
{
struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp));
s->arg = arg;
s->op = compare;
s->datum_a = a;
s->datum_b = b;
return s;
}
*/
import "C"
// Nonexported types
type scmpFilterAttr uint32
// Nonexported constants
const (
filterAttrActDefault scmpFilterAttr = iota
filterAttrActBadArch scmpFilterAttr = iota
filterAttrNNP scmpFilterAttr = iota
filterAttrTsync scmpFilterAttr = iota
)
const (
// An error return from certain libseccomp functions
scmpError C.int = -1
// Comparison boundaries to check for architecture validity
archStart ScmpArch = ArchNative
archEnd ScmpArch = ArchS390X
// Comparison boundaries to check for action validity
actionStart ScmpAction = ActKill
actionEnd ScmpAction = ActAllow
// Comparison boundaries to check for comparison operator validity
compareOpStart ScmpCompareOp = CompareNotEqual
compareOpEnd ScmpCompareOp = CompareMaskedEqual
)
var (
// Error thrown on bad filter context
errBadFilter = fmt.Errorf("filter is invalid or uninitialized")
// Constants representing library major, minor, and micro versions
verMajor = int(C.C_VERSION_MAJOR)
verMinor = int(C.C_VERSION_MINOR)
verMicro = int(C.C_VERSION_MICRO)
)
// Nonexported functions
// Check if library version is greater than or equal to the given one
func checkVersionAbove(major, minor, micro int) bool {
return (verMajor > major) ||
(verMajor == major && verMinor > minor) ||
(verMajor == major && verMinor == minor && verMicro >= micro)
}
// Init function: Verify library version is appropriate
func init() {
if !checkVersionAbove(2, 1, 0) {
fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.1.0, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO)
os.Exit(-1)
}
}
// Filter helpers
// Filter finalizer - ensure that kernel context for filters is freed
func filterFinalizer(f *ScmpFilter) {
f.Release()
}
// Get a raw filter attribute
func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return 0x0, errBadFilter
}
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
return 0x0, fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
}
var attribute C.uint32_t
retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute)
if retCode != 0 {
return 0x0, syscall.Errno(-1 * retCode)
}
return attribute, nil
}
// Set a raw filter attribute
func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
return fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
}
retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value)
if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// DOES NOT LOCK OR CHECK VALIDITY
// Assumes caller has already done this
// Wrapper for seccomp_rule_add_... functions
func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error {
var length C.uint
if cond != nil {
length = 1
} else {
length = 0
}
var retCode C.int
if exact {
retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
} else {
retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
}
if syscall.Errno(-1*retCode) == syscall.EFAULT {
return fmt.Errorf("unrecognized syscall")
} else if syscall.Errno(-1*retCode) == syscall.EPERM {
return fmt.Errorf("requested action matches default action of filter")
} else if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Generic add function for filter rules
func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if len(conds) == 0 {
if err := f.addRuleWrapper(call, action, exact, nil); err != nil {
return err
}
} else {
// We don't support conditional filtering in library version v2.1
if !checkVersionAbove(2, 2, 1) {
return fmt.Errorf("conditional filtering requires libseccomp version >= 2.2.1")
}
for _, cond := range conds {
cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2))
defer C.free(cmpStruct)
if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil {
return err
}
}
}
return nil
}
// Generic Helpers
// Helper - Sanitize Arch token input
func sanitizeArch(in ScmpArch) error {
if in < archStart || in > archEnd {
return fmt.Errorf("unrecognized architecture")
}
if in.toNative() == C.C_ARCH_BAD {
return fmt.Errorf("architecture is not supported on this version of the library")
}
return nil
}
func sanitizeAction(in ScmpAction) error {
inTmp := in & 0x0000FFFF
if inTmp < actionStart || inTmp > actionEnd {
return fmt.Errorf("unrecognized action")
}
if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 {
return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno")
}
return nil
}
func sanitizeCompareOp(in ScmpCompareOp) error {
if in < compareOpStart || in > compareOpEnd {
return fmt.Errorf("unrecognized comparison operator")
}
return nil
}
func archFromNative(a C.uint32_t) (ScmpArch, error) {
switch a {
case C.C_ARCH_X86:
return ArchX86, nil
case C.C_ARCH_X86_64:
return ArchAMD64, nil
case C.C_ARCH_X32:
return ArchX32, nil
case C.C_ARCH_ARM:
return ArchARM, nil
case C.C_ARCH_NATIVE:
return ArchNative, nil
case C.C_ARCH_AARCH64:
return ArchARM64, nil
case C.C_ARCH_MIPS:
return ArchMIPS, nil
case C.C_ARCH_MIPS64:
return ArchMIPS64, nil
case C.C_ARCH_MIPS64N32:
return ArchMIPS64N32, nil
case C.C_ARCH_MIPSEL:
return ArchMIPSEL, nil
case C.C_ARCH_MIPSEL64:
return ArchMIPSEL64, nil
case C.C_ARCH_MIPSEL64N32:
return ArchMIPSEL64N32, nil
case C.C_ARCH_PPC:
return ArchPPC, nil
case C.C_ARCH_PPC64:
return ArchPPC64, nil
case C.C_ARCH_PPC64LE:
return ArchPPC64LE, nil
case C.C_ARCH_S390:
return ArchS390, nil
case C.C_ARCH_S390X:
return ArchS390X, nil
default:
return 0x0, fmt.Errorf("unrecognized architecture")
}
}
// Only use with sanitized arches, no error handling
func (a ScmpArch) toNative() C.uint32_t {
switch a {
case ArchX86:
return C.C_ARCH_X86
case ArchAMD64:
return C.C_ARCH_X86_64
case ArchX32:
return C.C_ARCH_X32
case ArchARM:
return C.C_ARCH_ARM
case ArchARM64:
return C.C_ARCH_AARCH64
case ArchMIPS:
return C.C_ARCH_MIPS
case ArchMIPS64:
return C.C_ARCH_MIPS64
case ArchMIPS64N32:
return C.C_ARCH_MIPS64N32
case ArchMIPSEL:
return C.C_ARCH_MIPSEL
case ArchMIPSEL64:
return C.C_ARCH_MIPSEL64
case ArchMIPSEL64N32:
return C.C_ARCH_MIPSEL64N32
case ArchPPC:
return C.C_ARCH_PPC
case ArchPPC64:
return C.C_ARCH_PPC64
case ArchPPC64LE:
return C.C_ARCH_PPC64LE
case ArchS390:
return C.C_ARCH_S390
case ArchS390X:
return C.C_ARCH_S390X
case ArchNative:
return C.C_ARCH_NATIVE
default:
return 0x0
}
}
// Only use with sanitized ops, no error handling
func (a ScmpCompareOp) toNative() C.int {
switch a {
case CompareNotEqual:
return C.C_CMP_NE
case CompareLess:
return C.C_CMP_LT
case CompareLessOrEqual:
return C.C_CMP_LE
case CompareEqual:
return C.C_CMP_EQ
case CompareGreaterEqual:
return C.C_CMP_GE
case CompareGreater:
return C.C_CMP_GT
case CompareMaskedEqual:
return C.C_CMP_MASKED_EQ
default:
return 0x0
}
}
func actionFromNative(a C.uint32_t) (ScmpAction, error) {
aTmp := a & 0xFFFF
switch a & 0xFFFF0000 {
case C.C_ACT_KILL:
return ActKill, nil
case C.C_ACT_TRAP:
return ActTrap, nil
case C.C_ACT_ERRNO:
return ActErrno.SetReturnCode(int16(aTmp)), nil
case C.C_ACT_TRACE:
return ActTrace.SetReturnCode(int16(aTmp)), nil
case C.C_ACT_ALLOW:
return ActAllow, nil
default:
return 0x0, fmt.Errorf("unrecognized action")
}
}
// Only use with sanitized actions, no error handling
func (a ScmpAction) toNative() C.uint32_t {
switch a & 0xFFFF {
case ActKill:
return C.C_ACT_KILL
case ActTrap:
return C.C_ACT_TRAP
case ActErrno:
return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16)
case ActTrace:
return C.C_ACT_TRACE | (C.uint32_t(a) >> 16)
case ActAllow:
return C.C_ACT_ALLOW
default:
return 0x0
}
}
// Internal only, assumes safe attribute
func (a scmpFilterAttr) toNative() uint32 {
switch a {
case filterAttrActDefault:
return uint32(C.C_ATTRIBUTE_DEFAULT)
case filterAttrActBadArch:
return uint32(C.C_ATTRIBUTE_BADARCH)
case filterAttrNNP:
return uint32(C.C_ATTRIBUTE_NNP)
case filterAttrTsync:
return uint32(C.C_ATTRIBUTE_TSYNC)
default:
return 0x0
}
}

View file

@ -1,19 +1,28 @@
Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
All rights reserved.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -327,3 +327,14 @@ func toASCII(s string) string {
}
return buf.String()
}
// isHeaderOnlyType checks if the given type flag is of the type that has no
// data section even if a size is specified.
func isHeaderOnlyType(flag byte) bool {
switch flag {
case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
return true
default:
return false
}
}

View file

@ -12,6 +12,7 @@ import (
"errors"
"io"
"io/ioutil"
"math"
"os"
"strconv"
"strings"
@ -39,6 +40,10 @@ type Reader struct {
rawBytes *bytes.Buffer // last raw bits
}
type parser struct {
err error // Last error seen
}
// RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
// This includes the header and padding.
//
@ -70,12 +75,36 @@ type regFileReader struct {
nb int64 // number of unread bytes for current file entry
}
// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
// A sparseFileReader is a numBytesReader for reading sparse file data from a
// tar archive.
type sparseFileReader struct {
rfr *regFileReader // reads the sparse-encoded file data
sp []sparseEntry // the sparse map for the file
pos int64 // keeps track of file position
tot int64 // total size of the file
rfr numBytesReader // Reads the sparse-encoded file data
sp []sparseEntry // The sparse map for the file
pos int64 // Keeps track of file position
total int64 // Total size of the file
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
}
// Keywords for GNU sparse files in a PAX extended header
@ -109,7 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
//
// io.EOF is returned at the end of the input.
func (tr *Reader) Next() (*Header, error) {
var hdr *Header
if tr.RawAccounting {
if tr.rawBytes == nil {
tr.rawBytes = bytes.NewBuffer(nil)
@ -117,98 +145,88 @@ func (tr *Reader) Next() (*Header, error) {
tr.rawBytes.Reset()
}
}
if tr.err == nil {
tr.skipUnread()
}
if tr.err != nil {
return hdr, tr.err
return nil, tr.err
}
hdr = tr.readHeader()
if hdr == nil {
return hdr, tr.err
}
// Check for PAX/GNU header.
switch hdr.Typeflag {
case TypeXHeader:
// PAX extended header
headers, err := parsePAX(tr)
if err != nil {
return nil, err
}
// We actually read the whole file,
// but this skips alignment padding
tr.skipUnread()
var hdr *Header
var extHdrs map[string]string
// Externally, Next iterates through the tar archive as if it is a series of
// files. Internally, the tar format often uses fake "files" to add meta
// data that describes the next file. These meta data "files" should not
// normally be visible to the outside. As such, this loop iterates through
// one or more "header files" until it finds a "normal file".
loop:
for {
tr.err = tr.skipUnread()
if tr.err != nil {
return nil, tr.err
}
hdr = tr.readHeader()
if hdr == nil {
if tr.err != nil {
return nil, tr.err
}
mergePAX(hdr, headers)
// Check for PAX/GNU special headers and files.
switch hdr.Typeflag {
case TypeXHeader:
extHdrs, tr.err = parsePAX(tr)
if tr.err != nil {
return nil, tr.err
}
continue loop // This is a meta header affecting the next header
case TypeGNULongName, TypeGNULongLink:
var realname []byte
realname, tr.err = ioutil.ReadAll(tr)
if tr.err != nil {
return nil, tr.err
}
// Check for a PAX format sparse file
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
if err != nil {
tr.err = err
return nil, err
}
if sp != nil {
// Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
}
return hdr, nil
case TypeGNULongName:
// We have a GNU long name header. Its contents are the real file name.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
var buf []byte
if tr.RawAccounting {
if _, err = tr.rawBytes.Write(realname); err != nil {
if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
return nil, tr.err
}
}
// Convert GNU extensions to use PAX headers.
if extHdrs == nil {
extHdrs = make(map[string]string)
}
var p parser
switch hdr.Typeflag {
case TypeGNULongName:
extHdrs[paxPath] = p.parseString(realname)
case TypeGNULongLink:
extHdrs[paxLinkpath] = p.parseString(realname)
}
if p.err != nil {
tr.err = p.err
return nil, tr.err
}
continue loop // This is a meta header affecting the next header
default:
mergePAX(hdr, extHdrs)
// Check for a PAX format sparse file
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
if err != nil {
tr.err = err
return nil, err
}
buf = make([]byte, tr.rawBytes.Len())
copy(buf[:], tr.RawBytes())
}
hdr, err := tr.Next()
// since the above call to Next() resets the buffer, we need to throw the bytes over
if tr.RawAccounting {
buf = append(buf, tr.RawBytes()...)
if _, err = tr.rawBytes.Write(buf); err != nil {
return nil, err
if sp != nil {
// Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader.
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil, tr.err
}
}
break loop // This is a file, so stop
}
hdr.Name = cString(realname)
return hdr, err
case TypeGNULongLink:
// We have a GNU long link header.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
var buf []byte
if tr.RawAccounting {
if _, err = tr.rawBytes.Write(realname); err != nil {
return nil, err
}
buf = make([]byte, tr.rawBytes.Len())
copy(buf[:], tr.RawBytes())
}
hdr, err := tr.Next()
// since the above call to Next() resets the buffer, we need to throw the bytes over
if tr.RawAccounting {
buf = append(buf, tr.RawBytes()...)
if _, err = tr.rawBytes.Write(buf); err != nil {
return nil, err
}
}
hdr.Linkname = cString(realname)
return hdr, err
}
return hdr, tr.err
return hdr, nil
}
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
@ -385,6 +403,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return nil, err
}
}
sbuf := string(buf)
// For GNU PAX sparse format 0.0 support.
// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
@ -393,35 +412,17 @@ func parsePAX(r io.Reader) (map[string]string, error) {
headers := make(map[string]string)
// Each record is constructed as
// "%d %s=%s\n", length, keyword, value
for len(buf) > 0 {
// or the header was empty to start with.
var sp int
// The size field ends at the first space.
sp = bytes.IndexByte(buf, ' ')
if sp == -1 {
for len(sbuf) > 0 {
key, value, residual, err := parsePAXRecord(sbuf)
if err != nil {
return nil, ErrHeader
}
// Parse the first token as a decimal integer.
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
if err != nil || n < 5 || int64(len(buf)) < n {
return nil, ErrHeader
}
// Extract everything between the decimal and the n -1 on the
// beginning to eat the ' ', -1 on the end to skip the newline.
var record []byte
record, buf = buf[sp+1:n-1], buf[n:]
// The first equals is guaranteed to mark the end of the key.
// Everything else is value.
eq := bytes.IndexByte(record, '=')
if eq == -1 {
return nil, ErrHeader
}
key, value := record[:eq], record[eq+1:]
sbuf = residual
keyStr := string(key)
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
sparseMap.Write(value)
sparseMap.WriteString(value)
sparseMap.Write([]byte{','})
} else {
// Normal key. Set the value in the headers map.
@ -436,9 +437,42 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return headers, nil
}
// cString parses bytes as a NUL-terminated C-style string.
// parsePAXRecord parses the input PAX record string into a key-value pair.
// If parsing is successful, it will slice off the currently read record and
// return the remainder as r.
//
// A PAX record is of the following form:
// "%d %s=%s\n" % (size, key, value)
func parsePAXRecord(s string) (k, v, r string, err error) {
// The size field ends at the first space.
sp := strings.IndexByte(s, ' ')
if sp == -1 {
return "", "", s, ErrHeader
}
// Parse the first token as a decimal integer.
n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
if perr != nil || n < 5 || int64(len(s)) < n {
return "", "", s, ErrHeader
}
// Extract everything between the space and the final newline.
rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
if nl != "\n" {
return "", "", s, ErrHeader
}
// The first equals separates the key from the value.
eq := strings.IndexByte(rec, '=')
if eq == -1 {
return "", "", s, ErrHeader
}
return rec[:eq], rec[eq+1:], rem, nil
}
// parseString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string {
func (*parser) parseString(b []byte) string {
n := 0
for n < len(b) && b[n] != 0 {
n++
@ -446,19 +480,51 @@ func cString(b []byte) string {
return string(b[0:n])
}
func (tr *Reader) octal(b []byte) int64 {
// Check for binary format first.
// parseNumeric parses the input as being encoded in either base-256 or octal.
// This function may return negative numbers.
// If parsing fails or an integer overflow occurs, err will be set.
func (p *parser) parseNumeric(b []byte) int64 {
// Check for base-256 (binary) format first.
// If the first bit is set, then all following bits constitute a two's
// complement encoded number in big-endian byte order.
if len(b) > 0 && b[0]&0x80 != 0 {
var x int64
for i, c := range b {
if i == 0 {
c &= 0x7f // ignore signal bit in first byte
}
x = x<<8 | int64(c)
// Handling negative numbers relies on the following identity:
// -a-1 == ^a
//
// If the number is negative, we use an inversion mask to invert the
// data bytes and treat the value as an unsigned number.
var inv byte // 0x00 if positive or zero, 0xff if negative
if b[0]&0x40 != 0 {
inv = 0xff
}
return x
var x uint64
for i, c := range b {
c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
if i == 0 {
c &= 0x7f // Ignore signal bit in first byte
}
if (x >> 56) > 0 {
p.err = ErrHeader // Integer overflow
return 0
}
x = x<<8 | uint64(c)
}
if (x >> 63) > 0 {
p.err = ErrHeader // Integer overflow
return 0
}
if inv == 0xff {
return ^int64(x)
}
return int64(x)
}
// Normal case is base-8 (octal) format.
return p.parseOctal(b)
}
func (p *parser) parseOctal(b []byte) int64 {
// Because unused fields are filled with NULs, we need
// to skip leading NULs. Fields may also be padded with
// spaces or NULs.
@ -469,27 +535,55 @@ func (tr *Reader) octal(b []byte) int64 {
if len(b) == 0 {
return 0
}
x, err := strconv.ParseUint(cString(b), 8, 64)
if err != nil {
tr.err = err
x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
if perr != nil {
p.err = ErrHeader
}
return int64(x)
}
// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.numBytes() + tr.pad // number of bytes to skip
// skipUnread skips any unread bytes in the existing file entry, as well as any
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
// encountered in the data portion; it is okay to hit io.EOF in the padding.
//
// Note that this function still works properly even when sparse files are being
// used since numBytes returns the bytes remaining in the underlying io.Reader.
func (tr *Reader) skipUnread() error {
dataSkip := tr.numBytes() // Number of data bytes to skip
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
tr.curr, tr.pad = nil, 0
if tr.RawAccounting {
_, tr.err = io.CopyN(tr.rawBytes, tr.r, nr)
return
_, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
return tr.err
}
if sr, ok := tr.r.(io.Seeker); ok {
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
return
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the tar stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, os.SEEK_CUR)
if err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
if err != nil {
tr.err = err
return tr.err
}
seekSkipped = pos2 - pos1
}
}
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
var copySkipped int64 // Number of bytes skipped via CopyN
copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
tr.err = io.ErrUnexpectedEOF
}
return tr.err
}
func (tr *Reader) verifyChecksum(header []byte) bool {
@ -497,23 +591,32 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
return false
}
given := tr.octal(header[148:156])
var p parser
given := p.parseOctal(header[148:156])
unsigned, signed := checksum(header)
return given == unsigned || given == signed
return p.err == nil && (given == unsigned || given == signed)
}
// readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary.
//
// The err will be set to io.EOF only when one of the following occurs:
// * Exactly 0 bytes are read and EOF is hit.
// * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read.
func (tr *Reader) readHeader() *Header {
header := tr.hdrBuff[:]
copy(header, zeroBlock)
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
if n, err := io.ReadFull(tr.r, header); err != nil {
tr.err = err
// because it could read some of the block, but reach EOF first
if tr.err == io.EOF && tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
return nil
if _, err := tr.rawBytes.Write(header[:n]); err != nil {
tr.err = err
}
}
return nil
return nil // io.EOF is okay here
}
if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@ -523,14 +626,15 @@ func (tr *Reader) readHeader() *Header {
// Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) {
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
if n, err := io.ReadFull(tr.r, header); err != nil {
tr.err = err
// because it could read some of the block, but reach EOF first
if tr.err == io.EOF && tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
return nil
if _, err := tr.rawBytes.Write(header[:n]); err != nil {
tr.err = err
}
}
return nil
return nil // io.EOF is okay here
}
if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@ -551,22 +655,19 @@ func (tr *Reader) readHeader() *Header {
}
// Unpack
var p parser
hdr := new(Header)
s := slicer(header)
hdr.Name = cString(s.next(100))
hdr.Mode = tr.octal(s.next(8))
hdr.Uid = int(tr.octal(s.next(8)))
hdr.Gid = int(tr.octal(s.next(8)))
hdr.Size = tr.octal(s.next(12))
if hdr.Size < 0 {
tr.err = ErrHeader
return nil
}
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
hdr.Name = p.parseString(s.next(100))
hdr.Mode = p.parseNumeric(s.next(8))
hdr.Uid = int(p.parseNumeric(s.next(8)))
hdr.Gid = int(p.parseNumeric(s.next(8)))
hdr.Size = p.parseNumeric(s.next(12))
hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
s.next(8) // chksum
hdr.Typeflag = s.next(1)[0]
hdr.Linkname = cString(s.next(100))
hdr.Linkname = p.parseString(s.next(100))
// The remainder of the header depends on the value of magic.
// The original (v7) version of tar had no explicit magic field,
@ -586,70 +687,76 @@ func (tr *Reader) readHeader() *Header {
switch format {
case "posix", "gnu", "star":
hdr.Uname = cString(s.next(32))
hdr.Gname = cString(s.next(32))
hdr.Uname = p.parseString(s.next(32))
hdr.Gname = p.parseString(s.next(32))
devmajor := s.next(8)
devminor := s.next(8)
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
hdr.Devmajor = tr.octal(devmajor)
hdr.Devminor = tr.octal(devminor)
hdr.Devmajor = p.parseNumeric(devmajor)
hdr.Devminor = p.parseNumeric(devminor)
}
var prefix string
switch format {
case "posix", "gnu":
prefix = cString(s.next(155))
prefix = p.parseString(s.next(155))
case "star":
prefix = cString(s.next(131))
hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
prefix = p.parseString(s.next(131))
hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
}
if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name
}
}
if tr.err != nil {
if p.err != nil {
tr.err = p.err
return nil
}
nb := hdr.Size
if isHeaderOnlyType(hdr.Typeflag) {
nb = 0
}
if nb < 0 {
tr.err = ErrHeader
return nil
}
// Maximum value of hdr.Size is 64 GB (12 octal digits),
// so there's no risk of int64 overflowing.
nb := int64(hdr.Size)
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
// Set the current file reader.
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
tr.curr = &regFileReader{r: tr.r, nb: nb}
// Check for old GNU sparse format entry.
if hdr.Typeflag == TypeGNUSparse {
// Get the real size of the file.
hdr.Size = tr.octal(header[483:495])
hdr.Size = p.parseNumeric(header[483:495])
if p.err != nil {
tr.err = p.err
return nil
}
// Read the sparse map.
sp := tr.readOldGNUSparseMap(header)
if tr.err != nil {
return nil
}
// Current file is a GNU sparse file. Update the current file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil
}
}
return hdr
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
// A sparse entry indicates the offset and size in a sparse file of a
// block of data.
type sparseEntry struct {
offset int64
numBytes int64
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map.
func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
var p parser
isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
spCap := oldGNUSparseMainHeaderNumEntries
if isExtended {
@ -660,10 +767,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
// Read the four entries from the main tar header
for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
offset := tr.octal(s.next(oldGNUSparseOffsetSize))
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
if tr.err != nil {
tr.err = ErrHeader
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
if p.err != nil {
tr.err = p.err
return nil
}
if offset == 0 && numBytes == 0 {
@ -687,10 +794,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
s = slicer(sparseHeader)
for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
offset := tr.octal(s.next(oldGNUSparseOffsetSize))
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
if tr.err != nil {
tr.err = ErrHeader
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
if p.err != nil {
tr.err = p.err
return nil
}
if offset == 0 && numBytes == 0 {
@ -702,134 +809,111 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
return sp
}
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
// The sparse map is stored just before the file data and padded out to the nearest block boundary.
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
// version 1.0. The format of the sparse map consists of a series of
// newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two
// fields (offset, numBytes). This function must stop reading at the end
// boundary of the block containing the last newline.
//
// Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal.
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
buf := make([]byte, 2*blockSize)
sparseHeader := buf[:blockSize]
var cntNewline int64
var buf bytes.Buffer
var blk = make([]byte, blockSize)
// readDecimal is a helper function to read a decimal integer from the sparse map
// while making sure to read from the file in blocks of size blockSize
readDecimal := func() (int64, error) {
// Look for newline
nl := bytes.IndexByte(sparseHeader, '\n')
if nl == -1 {
if len(sparseHeader) >= blockSize {
// This is an error
return 0, ErrHeader
// feedTokens copies data in numBlock chunks from r into buf until there are
// at least cnt newlines in buf. It will not read more blocks than needed.
var feedTokens = func(cnt int64) error {
for cntNewline < cnt {
if _, err := io.ReadFull(r, blk); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
oldLen := len(sparseHeader)
newLen := oldLen + blockSize
if cap(sparseHeader) < newLen {
// There's more header, but we need to make room for the next block
copy(buf, sparseHeader)
sparseHeader = buf[:newLen]
} else {
// There's more header, and we can just reslice
sparseHeader = sparseHeader[:newLen]
}
// Now that sparseHeader is large enough, read next block
if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
return 0, err
}
// leaving this function for io.Reader makes it more testable
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
return 0, err
buf.Write(blk)
for _, c := range blk {
if c == '\n' {
cntNewline++
}
}
// Look for a newline in the new data
nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
if nl == -1 {
// This is an error
return 0, ErrHeader
}
nl += oldLen // We want the position from the beginning
}
// Now that we've found a newline, read a number
n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
if err != nil {
return 0, ErrHeader
}
// Update sparseHeader to consume this number
sparseHeader = sparseHeader[nl+1:]
return n, nil
return nil
}
// Read the first block
if _, err := io.ReadFull(r, sparseHeader); err != nil {
// nextToken gets the next token delimited by a newline. This assumes that
// at least one newline exists in the buffer.
var nextToken = func() string {
cntNewline--
tok, _ := buf.ReadString('\n')
return tok[:len(tok)-1] // Cut off newline
}
// Parse for the number of entries.
// Use integer overflow resistant math to check this.
if err := feedTokens(1); err != nil {
return nil, err
}
// leaving this function for io.Reader makes it more testable
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
if _, err := tr.rawBytes.Write(sparseHeader); err != nil {
return nil, err
}
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
return nil, ErrHeader
}
// The first line contains the number of entries
numEntries, err := readDecimal()
if err != nil {
// Parse for all member entries.
// numEntries is trusted after this since a potential attacker must have
// committed resources proportional to what this library used.
if err := feedTokens(2 * numEntries); err != nil {
return nil, err
}
// Read all the entries
sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
// Read the offset
offset, err := readDecimal()
offset, err := strconv.ParseInt(nextToken(), 10, 64)
if err != nil {
return nil, err
return nil, ErrHeader
}
// Read numBytes
numBytes, err := readDecimal()
numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
if err != nil {
return nil, err
return nil, ErrHeader
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
return sp, nil
}
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
// The sparse map is stored in the PAX headers.
func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
// Get number of entries
numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
if !ok {
return nil, ErrHeader
}
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
if err != nil {
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
// version 0.1. The sparse map is stored in the PAX headers.
func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
// Get number of entries.
// Use integer overflow resistant math to check this.
numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
return nil, ErrHeader
}
sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
// There should be two numbers in sparseMap for each entry
// There should be two numbers in sparseMap for each entry.
sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader
}
// Loop through the entries in the sparse map
// Loop through the entries in the sparse map.
// numEntries is trusted now.
sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
if err != nil {
return nil, ErrHeader
}
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
if err != nil {
return nil, ErrHeader
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
return sp, nil
}
@ -846,10 +930,18 @@ func (tr *Reader) numBytes() int64 {
// Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry,
// until Next is called to advance to the next entry.
//
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
// the Header.Size claims.
func (tr *Reader) Read(b []byte) (n int, err error) {
if tr.err != nil {
return 0, tr.err
}
if tr.curr == nil {
return 0, io.EOF
}
n, err = tr.curr.Read(b)
if err != nil && err != io.EOF {
tr.err = err
@ -879,9 +971,33 @@ func (rfr *regFileReader) numBytes() int64 {
return rfr.nb
}
// readHole reads a sparse file hole ending at offset toOffset
func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
n64 := toOffset - sfr.pos
// newSparseFileReader creates a new sparseFileReader, but validates all of the
// sparse entries before doing so.
func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
if total < 0 {
return nil, ErrHeader // Total size cannot be negative
}
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
for i, s := range sp {
switch {
case s.offset < 0 || s.numBytes < 0:
return nil, ErrHeader // Negative values are never okay
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
}
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
}
// readHole reads a sparse hole ending at endOffset.
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
n64 := endOffset - sfr.pos
if n64 > int64(len(b)) {
n64 = int64(len(b))
}
@ -895,49 +1011,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
// Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
if len(sfr.sp) == 0 {
// No more data fragments to read from.
if sfr.pos < sfr.tot {
// We're in the last hole
n = sfr.readHole(b, sfr.tot)
return
}
// Otherwise, we're at the end of the file
return 0, io.EOF
}
if sfr.tot < sfr.sp[0].offset {
return 0, io.ErrUnexpectedEOF
}
if sfr.pos < sfr.sp[0].offset {
// We're in a hole
n = sfr.readHole(b, sfr.sp[0].offset)
return
// Skip past all empty fragments.
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
sfr.sp = sfr.sp[1:]
}
// We're not in a hole, so we'll read from the next data fragment
posInFragment := sfr.pos - sfr.sp[0].offset
bytesLeft := sfr.sp[0].numBytes - posInFragment
// If there are no more fragments, then it is possible that there
// is one last sparse hole.
if len(sfr.sp) == 0 {
// This behavior matches the BSD tar utility.
// However, GNU tar stops returning data even if sfr.total is unmet.
if sfr.pos < sfr.total {
return sfr.readHole(b, sfr.total), nil
}
return 0, io.EOF
}
// In front of a data fragment, so read a hole.
if sfr.pos < sfr.sp[0].offset {
return sfr.readHole(b, sfr.sp[0].offset), nil
}
// In a data fragment, so read from it.
// This math is overflow free since we verify that offset and numBytes can
// be safely added when creating the sparseFileReader.
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
bytesLeft := endPos - sfr.pos // Bytes left in fragment
if int64(len(b)) > bytesLeft {
b = b[0:bytesLeft]
b = b[:bytesLeft]
}
n, err = sfr.rfr.Read(b)
sfr.pos += int64(n)
if int64(n) == bytesLeft {
// We're done with this fragment
sfr.sp = sfr.sp[1:]
if err == io.EOF {
if sfr.pos < endPos {
err = io.ErrUnexpectedEOF // There was supposed to be more data
} else if sfr.pos < sfr.total {
err = nil // There is still an implicit sparse hole at the end
}
}
if err == io.EOF && sfr.pos < sfr.tot {
// We reached the end of the last fragment's data, but there's a final hole
err = nil
if sfr.pos == endPos {
sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
}
return
return n, err
}
// numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.nb
return sfr.rfr.numBytes()
}

View file

@ -12,8 +12,8 @@ import (
"errors"
"fmt"
"io"
"os"
"path"
"sort"
"strconv"
"strings"
"time"
@ -23,7 +23,6 @@ var (
ErrWriteTooLong = errors.New("archive/tar: write too long")
ErrFieldTooLong = errors.New("archive/tar: header field too long")
ErrWriteAfterClose = errors.New("archive/tar: write after close")
errNameTooLong = errors.New("archive/tar: name too long")
errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values")
)
@ -43,6 +42,10 @@ type Writer struct {
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
}
type formatter struct {
err error // Last error seen
}
// NewWriter creates a new Writer writing to w.
func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
@ -69,17 +72,9 @@ func (tw *Writer) Flush() error {
}
// Write s into b, terminating it with a NUL if there is room.
// If the value is too long for the field and allowPax is true add a paxheader record instead
func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
func (f *formatter) formatString(b []byte, s string) {
if len(s) > len(b) {
if tw.err == nil {
tw.err = ErrFieldTooLong
}
f.err = ErrFieldTooLong
return
}
ascii := toASCII(s)
@ -90,40 +85,40 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string,
}
// Encode x as an octal ASCII string and write it into b with leading zeros.
func (tw *Writer) octal(b []byte, x int64) {
func (f *formatter) formatOctal(b []byte, x int64) {
s := strconv.FormatInt(x, 8)
// leading zeros, but leave room for a NUL.
for len(s)+1 < len(b) {
s = "0" + s
}
tw.cString(b, s, false, paxNone, nil)
f.formatString(b, s)
}
// Write x into b, either as octal or as binary (GNUtar/star extension).
// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
// Try octal first.
s := strconv.FormatInt(x, 8)
if len(s) < len(b) {
tw.octal(b, x)
// fitsInBase256 reports whether x can be encoded into n bytes using base-256
// encoding. Unlike octal encoding, base-256 encoding does not require that the
// string ends with a NUL character. Thus, all n bytes are available for output.
//
// If operating in binary mode, this assumes strict GNU binary mode; which means
// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
// equivalent to the sign bit in two's complement form.
func fitsInBase256(n int, x int64) bool {
var binBits = uint(n-1) * 8
return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
}
// Write x into b, as binary (GNUtar/star extension).
func (f *formatter) formatNumeric(b []byte, x int64) {
if fitsInBase256(len(b), x) {
for i := len(b) - 1; i >= 0; i-- {
b[i] = byte(x)
x >>= 8
}
b[0] |= 0x80 // Highest bit indicates binary format
return
}
// If it is too long for octal, and pax is preferred, use a pax header
if allowPax && tw.preferPax {
tw.octal(b, 0)
s := strconv.FormatInt(x, 10)
paxHeaders[paxKeyword] = s
return
}
// Too big: use binary (big-endian).
tw.usedBinary = true
for i := len(b) - 1; x > 0 && i >= 0; i-- {
b[i] = byte(x)
x >>= 8
}
b[0] |= 0x80 // highest bit indicates binary format
f.formatOctal(b, 0) // Last resort, just write zero
f.err = ErrFieldTooLong
}
var (
@ -162,6 +157,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
// subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters
var f formatter
var header []byte
// We need to select which scratch buffer to use carefully,
@ -176,10 +172,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
copy(header, zeroBlock)
s := slicer(header)
// Wrappers around formatter that automatically sets paxHeaders if the
// argument extends beyond the capacity of the input byte slice.
var formatString = func(b []byte, s string, paxKeyword string) {
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
f.formatString(b, s)
}
var formatNumeric = func(b []byte, x int64, paxKeyword string) {
// Try octal first.
s := strconv.FormatInt(x, 8)
if len(s) < len(b) {
f.formatOctal(b, x)
return
}
// If it is too long for octal, and PAX is preferred, use a PAX header.
if paxKeyword != paxNone && tw.preferPax {
f.formatOctal(b, 0)
s := strconv.FormatInt(x, 10)
paxHeaders[paxKeyword] = s
return
}
tw.usedBinary = true
f.formatNumeric(b, x)
}
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
pathHeaderBytes := s.next(fileNameSize)
tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
formatString(pathHeaderBytes, hdr.Name, paxPath)
// Handle out of range ModTime carefully.
var modTime int64
@ -187,25 +213,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
modTime = hdr.ModTime.Unix()
}
tw.octal(s.next(8), hdr.Mode) // 100:108
tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136
tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity
s.next(8) // chksum (148:156)
s.next(1)[0] = hdr.Typeflag // 156:157
f.formatOctal(s.next(8), hdr.Mode) // 100:108
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
s.next(8) // chksum (148:156)
s.next(1)[0] = hdr.Typeflag // 156:157
tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
formatString(s.next(100), hdr.Linkname, paxLinkpath)
copy(s.next(8), []byte("ustar\x0000")) // 257:265
tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337
tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345
copy(s.next(8), []byte("ustar\x0000")) // 257:265
formatString(s.next(32), hdr.Uname, paxUname) // 265:297
formatString(s.next(32), hdr.Gname, paxGname) // 297:329
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
prefixHeaderBytes := s.next(155)
tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary {
@ -215,37 +241,26 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
_, paxPathUsed := paxHeaders[paxPath]
// try to use a ustar header when only the name is too long
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
suffix := hdr.Name
prefix := ""
if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
var err error
prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
if err == nil {
// ok we can use a ustar long name instead of pax, now correct the fields
prefix, suffix, ok := splitUSTARPath(hdr.Name)
if ok {
// Since we can encode in USTAR format, disable PAX header.
delete(paxHeaders, paxPath)
// remove the path field from the pax header. this will suppress the pax header
delete(paxHeaders, paxPath)
// update the path fields
tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
// Use the ustar magic if we used ustar long names.
if len(prefix) > 0 && !tw.usedBinary {
copy(header[257:265], []byte("ustar\x00"))
}
}
// Update the path fields
formatString(pathHeaderBytes, suffix, paxNone)
formatString(prefixHeaderBytes, prefix, paxNone)
}
}
// The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields.
chksum, _ := checksum(header)
tw.octal(header[148:155], chksum)
f.formatOctal(header[148:155], chksum) // Never fails
header[155] = ' '
if tw.err != nil {
// problem with header; probably integer too big for a field.
// Check if there were any formatting errors.
if f.err != nil {
tw.err = f.err
return tw.err
}
@ -270,28 +285,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
return tw.err
}
// writeUSTARLongName splits a USTAR long name hdr.Name.
// name must be < 256 characters. errNameTooLong is returned
// if hdr.Name can't be split. The splitting heuristic
// is compatible with gnu tar.
func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
// If the path is not splittable, then it will return ("", "", false).
func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
length := len(name)
if length > fileNamePrefixSize+1 {
if length <= fileNameSize || !isASCII(name) {
return "", "", false
} else if length > fileNamePrefixSize+1 {
length = fileNamePrefixSize + 1
} else if name[length-1] == '/' {
length--
}
i := strings.LastIndex(name[:length], "/")
// nlen contains the resulting length in the name field.
// plen contains the resulting length in the prefix field.
nlen := len(name) - i - 1
plen := i
nlen := len(name) - i - 1 // nlen is length of suffix
plen := i // plen is length of prefix
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
err = errNameTooLong
return
return "", "", false
}
prefix, suffix = name[:i], name[i+1:]
return
return name[:i], name[i+1:], true
}
// writePaxHeader writes an extended pax header to the
@ -304,11 +316,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
// succeed, and seems harmless enough.
ext.ModTime = hdr.ModTime
// The spec asks that we namespace our pseudo files
// with the current pid.
pid := os.Getpid()
// with the current pid. However, this results in differing outputs
// for identical inputs. As such, the constant 0 is now used instead.
// golang.org/issue/12358
dir, file := path.Split(hdr.Name)
fullName := path.Join(dir,
fmt.Sprintf("PaxHeaders.%d", pid), file)
fullName := path.Join(dir, "PaxHeaders.0", file)
ascii := toASCII(fullName)
if len(ascii) > 100 {
@ -318,8 +330,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
// Construct the body
var buf bytes.Buffer
for k, v := range paxHeaders {
fmt.Fprint(&buf, paxHeader(k+"="+v))
// Keys are sorted before writing to body to allow deterministic output.
var keys []string
for k := range paxHeaders {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
}
ext.Size = int64(len(buf.Bytes()))
@ -335,17 +354,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
return nil
}
// paxHeader formats a single pax record, prefixing it with the appropriate length
func paxHeader(msg string) string {
const padding = 2 // Extra padding for space and newline
size := len(msg) + padding
// formatPAXRecord formats a single PAX record, prefixing it with the
// appropriate length.
func formatPAXRecord(k, v string) string {
const padding = 3 // Extra padding for ' ', '=', and '\n'
size := len(k) + len(v) + padding
size += len(strconv.Itoa(size))
record := fmt.Sprintf("%d %s\n", size, msg)
record := fmt.Sprintf("%d %s=%s\n", size, k, v)
// Final adjustment if adding size field increased the record size.
if len(record) != size {
// Final adjustment if adding size increased
// the number of digits in size
size = len(record)
record = fmt.Sprintf("%d %s\n", size, msg)
record = fmt.Sprintf("%d %s=%s\n", size, k, v)
}
return record
}