add seccomp support

Signed-off-by: Antonio Murdaca <runcom@redhat.com>
This commit is contained in:
Antonio Murdaca 2016-11-23 10:41:48 +01:00
parent 1bd0ba8516
commit 78ee03a8fc
No known key found for this signature in database
GPG key ID: B2BEAD150DE936B9
90 changed files with 4745 additions and 629 deletions

View file

@ -64,6 +64,10 @@ type RuntimeConfig struct {
// SELinux determines whether or not SELinux is used for pod separation.
SELinux bool `toml:"selinux"`
// SeccompProfile is the seccomp json profile path which is used as the
// default for the runtime.
SeccompProfile string `toml:"seccomp_profile"`
}
// ImageConfig represents the "ocid.image" TOML config table.

View file

@ -6,11 +6,13 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/stringid"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/kubernetes-incubator/cri-o/server/seccomp"
"github.com/kubernetes-incubator/cri-o/utils"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runtime-tools/generate"
@ -18,6 +20,12 @@ import (
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
const (
seccompUnconfined = "unconfined"
seccompRuntimeDefault = "runtime/default"
seccompLocalhostPrefix = "localhost/"
)
// CreateContainer creates a new container in specified PodSandbox
func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) {
logrus.Debugf("CreateContainerRequest %+v", req)
@ -73,7 +81,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
return nil, err
}
container, err := s.createSandboxContainer(containerID, containerName, sb, req.GetSandboxConfig(), containerDir, containerConfig)
container, err := s.createSandboxContainer(containerID, containerName, sb, containerDir, containerConfig)
if err != nil {
return nil, err
}
@ -101,7 +109,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
return resp, nil
}
func (s *Server) createSandboxContainer(containerID string, containerName string, sb *sandbox, SandboxConfig *pb.PodSandboxConfig, containerDir string, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
func (s *Server) createSandboxContainer(containerID string, containerName string, sb *sandbox, containerDir string, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
if sb == nil {
return nil, errors.New("createSandboxContainer needs a sandbox")
}
@ -282,6 +290,10 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
}
specgen.AddAnnotation("ocid/labels", string(labelsJSON))
if err = s.setupSeccomp(&specgen, containerName, sb.annotations); err != nil {
return nil, err
}
if err = specgen.SaveToFile(filepath.Join(containerDir, "config.json"), generate.ExportOptions{}); err != nil {
return nil, err
}
@ -310,6 +322,41 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
return container, nil
}
func (s *Server) setupSeccomp(specgen *generate.Generator, cname string, sbAnnotations map[string]string) error {
profile, ok := sbAnnotations["security.alpha.kubernetes.io/seccomp/container/"+cname]
if !ok {
profile, ok = sbAnnotations["security.alpha.kubernetes.io/seccomp/pod"]
if !ok {
// running w/o seccomp, aka unconfined
profile = seccompUnconfined
}
}
if !s.seccompEnabled {
if profile != seccompUnconfined {
return fmt.Errorf("seccomp is not enabled in your kernel, cannot run with a profile")
}
logrus.Warn("seccomp is not enabled in your kernel, running container without profile")
}
if profile == seccompUnconfined {
// running w/o seccomp, aka unconfined
specgen.Spec().Linux.Seccomp = nil
return nil
}
if profile == seccompRuntimeDefault {
return seccomp.LoadProfileFromStruct(s.seccompProfile, specgen)
}
if !strings.HasPrefix(profile, seccompLocalhostPrefix) {
return fmt.Errorf("unknown seccomp profile option: %q", profile)
}
//file, err := ioutil.ReadFile(filepath.Join(s.seccompProfileRoot, strings.TrimPrefix(profile, seccompLocalhostPrefix)))
//if err != nil {
//return err
//}
// TODO(runcom): setup from provided node's seccomp profile
// can't do this yet, see https://issues.k8s.io/36997
return nil
}
func (s *Server) generateContainerIDandName(podName string, name string, attempt uint32) (string, string, error) {
var (
err error

147
server/seccomp/seccomp.go Normal file
View file

@ -0,0 +1,147 @@
package seccomp
import (
"encoding/json"
"errors"
"fmt"
"github.com/docker/docker/pkg/stringutils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
libseccomp "github.com/seccomp/libseccomp-golang"
)
// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec.
func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error {
return setupSeccomp(&config, specgen)
}
// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile.
func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error {
var config Seccomp
if err := json.Unmarshal(body, &config); err != nil {
return fmt.Errorf("decoding seccomp profile failed: %v", err)
}
return setupSeccomp(&config, specgen)
}
var nativeToSeccomp = map[string]Arch{
"amd64": ArchX86_64,
"arm64": ArchAARCH64,
"mips64": ArchMIPS64,
"mips64n32": ArchMIPS64N32,
"mipsel64": ArchMIPSEL64,
"mipsel64n32": ArchMIPSEL64N32,
"s390x": ArchS390X,
}
func setupSeccomp(config *Seccomp, specgen *generate.Generator) error {
if config == nil {
return nil
}
// No default action specified, no syscalls listed, assume seccomp disabled
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
return nil
}
var arch string
var native, err = libseccomp.GetNativeArch()
if err == nil {
arch = native.String()
}
if len(config.Architectures) != 0 && len(config.ArchMap) != 0 {
return errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'")
}
customspec := specgen.Spec()
customspec.Linux.Seccomp = &specs.Seccomp{}
// if config.Architectures == 0 then libseccomp will figure out the architecture to use
if len(config.Architectures) != 0 {
for _, a := range config.Architectures {
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a))
}
}
if len(config.ArchMap) != 0 {
for _, a := range config.ArchMap {
seccompArch, ok := nativeToSeccomp[arch]
if ok {
if a.Arch == seccompArch {
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a.Arch))
for _, sa := range a.SubArches {
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(sa))
}
break
}
}
}
}
customspec.Linux.Seccomp.DefaultAction = specs.Action(config.DefaultAction)
Loop:
// Loop through all syscall blocks and convert them to libcontainer format after filtering them
for _, call := range config.Syscalls {
if len(call.Excludes.Arches) > 0 {
if stringutils.InSlice(call.Excludes.Arches, arch) {
continue Loop
}
}
if len(call.Excludes.Caps) > 0 {
for _, c := range call.Excludes.Caps {
if stringutils.InSlice(customspec.Process.Capabilities, c) {
continue Loop
}
}
}
if len(call.Includes.Arches) > 0 {
if !stringutils.InSlice(call.Includes.Arches, arch) {
continue Loop
}
}
if len(call.Includes.Caps) > 0 {
for _, c := range call.Includes.Caps {
if !stringutils.InSlice(customspec.Process.Capabilities, c) {
continue Loop
}
}
}
if call.Name != "" && len(call.Names) != 0 {
return errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'")
}
if call.Name != "" {
customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args))
}
for _, n := range call.Names {
customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(n, call.Action, call.Args))
}
}
return nil
}
func createSpecsSyscall(name string, action Action, args []*Arg) specs.Syscall {
newCall := specs.Syscall{
Name: name,
Action: specs.Action(action),
}
// Loop through all the arguments of the syscall and convert them
for _, arg := range args {
newArg := specs.Arg{
Index: arg.Index,
Value: arg.Value,
ValueTwo: arg.ValueTwo,
Op: specs.Operator(arg.Op),
}
newCall.Args = append(newCall.Args, newArg)
}
return newCall
}

93
server/seccomp/types.go Normal file
View file

@ -0,0 +1,93 @@
package seccomp
// Seccomp represents the config for a seccomp profile for syscall restriction.
type Seccomp struct {
DefaultAction Action `json:"defaultAction"`
// Architectures is kept to maintain backward compatibility with the old
// seccomp profile.
Architectures []Arch `json:"architectures,omitempty"`
ArchMap []Architecture `json:"archMap,omitempty"`
Syscalls []*Syscall `json:"syscalls"`
}
// Architecture is used to represent an specific architecture
// and its sub-architectures
type Architecture struct {
Arch Arch `json:"architecture"`
SubArches []Arch `json:"subArchitectures"`
}
// Arch used for architectures
type Arch string
// Additional architectures permitted to be used for system calls
// By default only the native architecture of the kernel is permitted
const (
ArchX86 Arch = "SCMP_ARCH_X86"
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
ArchX32 Arch = "SCMP_ARCH_X32"
ArchARM Arch = "SCMP_ARCH_ARM"
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
ArchMIPS Arch = "SCMP_ARCH_MIPS"
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
ArchPPC Arch = "SCMP_ARCH_PPC"
ArchPPC64 Arch = "SCMP_ARCH_PPC64"
ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE"
ArchS390 Arch = "SCMP_ARCH_S390"
ArchS390X Arch = "SCMP_ARCH_S390X"
)
// Action taken upon Seccomp rule match
type Action string
// Define actions for Seccomp rules
const (
ActKill Action = "SCMP_ACT_KILL"
ActTrap Action = "SCMP_ACT_TRAP"
ActErrno Action = "SCMP_ACT_ERRNO"
ActTrace Action = "SCMP_ACT_TRACE"
ActAllow Action = "SCMP_ACT_ALLOW"
)
// Operator used to match syscall arguments in Seccomp
type Operator string
// Define operators for syscall arguments in Seccomp
const (
OpNotEqual Operator = "SCMP_CMP_NE"
OpLessThan Operator = "SCMP_CMP_LT"
OpLessEqual Operator = "SCMP_CMP_LE"
OpEqualTo Operator = "SCMP_CMP_EQ"
OpGreaterEqual Operator = "SCMP_CMP_GE"
OpGreaterThan Operator = "SCMP_CMP_GT"
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
)
// Arg used for matching specific syscall arguments in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"valueTwo"`
Op Operator `json:"op"`
}
// Filter is used to conditionally apply Seccomp rules
type Filter struct {
Caps []string `json:"caps,omitempty"`
Arches []string `json:"arches,omitempty"`
}
// Syscall is used to match a group of syscalls in Seccomp
type Syscall struct {
Name string `json:"name,omitempty"`
Names []string `json:"names,omitempty"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
Comment string `json:"comment"`
Includes Filter `json:"includes"`
Excludes Filter `json:"excludes"`
}

View file

@ -7,11 +7,13 @@ import (
"os"
"path/filepath"
"sync"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/registrar"
"github.com/docker/docker/pkg/truncindex"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/kubernetes-incubator/cri-o/server/seccomp"
"github.com/kubernetes-incubator/cri-o/utils"
"github.com/opencontainers/runc/libcontainer/label"
rspec "github.com/opencontainers/runtime-spec/specs-go"
@ -34,6 +36,9 @@ type Server struct {
podIDIndex *truncindex.TruncIndex
ctrNameIndex *registrar.Registrar
ctrIDIndex *truncindex.TruncIndex
seccompEnabled bool
seccompProfile seccomp.Seccomp
}
func (s *Server) loadContainer(id string) error {
@ -223,6 +228,23 @@ func (s *Server) releaseContainerName(name string) {
s.ctrNameIndex.Release(name)
}
const (
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
SeccompModeFilter = uintptr(2)
)
func seccompEnabled() bool {
var enabled bool
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
enabled = true
}
}
return enabled
}
// New creates a new Server with options provided
func New(config *Config) (*Server, error) {
// TODO: This will go away later when we have wrapper process or systemd acting as
@ -259,7 +281,17 @@ func New(config *Config) (*Server, error) {
sandboxes: sandboxes,
containers: containers,
},
seccompEnabled: seccompEnabled(),
}
seccompProfile, err := ioutil.ReadFile(config.SeccompProfile)
if err != nil {
return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, err)
}
var seccompConfig seccomp.Seccomp
if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil {
return nil, fmt.Errorf("decoding seccomp profile failed: %v", err)
}
s.seccompProfile = seccompConfig
s.podIDIndex = truncindex.NewTruncIndex([]string{})
s.podNameIndex = registrar.NewRegistrar()