add seccomp support
Signed-off-by: Antonio Murdaca <runcom@redhat.com>
This commit is contained in:
parent
1bd0ba8516
commit
78ee03a8fc
90 changed files with 4745 additions and 629 deletions
|
@ -64,6 +64,10 @@ type RuntimeConfig struct {
|
|||
|
||||
// SELinux determines whether or not SELinux is used for pod separation.
|
||||
SELinux bool `toml:"selinux"`
|
||||
|
||||
// SeccompProfile is the seccomp json profile path which is used as the
|
||||
// default for the runtime.
|
||||
SeccompProfile string `toml:"seccomp_profile"`
|
||||
}
|
||||
|
||||
// ImageConfig represents the "ocid.image" TOML config table.
|
||||
|
|
|
@ -6,11 +6,13 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/pkg/stringid"
|
||||
"github.com/kubernetes-incubator/cri-o/oci"
|
||||
"github.com/kubernetes-incubator/cri-o/server/seccomp"
|
||||
"github.com/kubernetes-incubator/cri-o/utils"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runtime-tools/generate"
|
||||
|
@ -18,6 +20,12 @@ import (
|
|||
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||
)
|
||||
|
||||
const (
|
||||
seccompUnconfined = "unconfined"
|
||||
seccompRuntimeDefault = "runtime/default"
|
||||
seccompLocalhostPrefix = "localhost/"
|
||||
)
|
||||
|
||||
// CreateContainer creates a new container in specified PodSandbox
|
||||
func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) {
|
||||
logrus.Debugf("CreateContainerRequest %+v", req)
|
||||
|
@ -73,7 +81,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
|
|||
return nil, err
|
||||
}
|
||||
|
||||
container, err := s.createSandboxContainer(containerID, containerName, sb, req.GetSandboxConfig(), containerDir, containerConfig)
|
||||
container, err := s.createSandboxContainer(containerID, containerName, sb, containerDir, containerConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -101,7 +109,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
|
|||
return resp, nil
|
||||
}
|
||||
|
||||
func (s *Server) createSandboxContainer(containerID string, containerName string, sb *sandbox, SandboxConfig *pb.PodSandboxConfig, containerDir string, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
|
||||
func (s *Server) createSandboxContainer(containerID string, containerName string, sb *sandbox, containerDir string, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
|
||||
if sb == nil {
|
||||
return nil, errors.New("createSandboxContainer needs a sandbox")
|
||||
}
|
||||
|
@ -282,6 +290,10 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
|
|||
}
|
||||
specgen.AddAnnotation("ocid/labels", string(labelsJSON))
|
||||
|
||||
if err = s.setupSeccomp(&specgen, containerName, sb.annotations); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = specgen.SaveToFile(filepath.Join(containerDir, "config.json"), generate.ExportOptions{}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -310,6 +322,41 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
|
|||
return container, nil
|
||||
}
|
||||
|
||||
func (s *Server) setupSeccomp(specgen *generate.Generator, cname string, sbAnnotations map[string]string) error {
|
||||
profile, ok := sbAnnotations["security.alpha.kubernetes.io/seccomp/container/"+cname]
|
||||
if !ok {
|
||||
profile, ok = sbAnnotations["security.alpha.kubernetes.io/seccomp/pod"]
|
||||
if !ok {
|
||||
// running w/o seccomp, aka unconfined
|
||||
profile = seccompUnconfined
|
||||
}
|
||||
}
|
||||
if !s.seccompEnabled {
|
||||
if profile != seccompUnconfined {
|
||||
return fmt.Errorf("seccomp is not enabled in your kernel, cannot run with a profile")
|
||||
}
|
||||
logrus.Warn("seccomp is not enabled in your kernel, running container without profile")
|
||||
}
|
||||
if profile == seccompUnconfined {
|
||||
// running w/o seccomp, aka unconfined
|
||||
specgen.Spec().Linux.Seccomp = nil
|
||||
return nil
|
||||
}
|
||||
if profile == seccompRuntimeDefault {
|
||||
return seccomp.LoadProfileFromStruct(s.seccompProfile, specgen)
|
||||
}
|
||||
if !strings.HasPrefix(profile, seccompLocalhostPrefix) {
|
||||
return fmt.Errorf("unknown seccomp profile option: %q", profile)
|
||||
}
|
||||
//file, err := ioutil.ReadFile(filepath.Join(s.seccompProfileRoot, strings.TrimPrefix(profile, seccompLocalhostPrefix)))
|
||||
//if err != nil {
|
||||
//return err
|
||||
//}
|
||||
// TODO(runcom): setup from provided node's seccomp profile
|
||||
// can't do this yet, see https://issues.k8s.io/36997
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) generateContainerIDandName(podName string, name string, attempt uint32) (string, string, error) {
|
||||
var (
|
||||
err error
|
||||
|
|
147
server/seccomp/seccomp.go
Normal file
147
server/seccomp/seccomp.go
Normal file
|
@ -0,0 +1,147 @@
|
|||
package seccomp
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/docker/docker/pkg/stringutils"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/runtime-tools/generate"
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
)
|
||||
|
||||
// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec.
|
||||
func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error {
|
||||
return setupSeccomp(&config, specgen)
|
||||
}
|
||||
|
||||
// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile.
|
||||
func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error {
|
||||
var config Seccomp
|
||||
if err := json.Unmarshal(body, &config); err != nil {
|
||||
return fmt.Errorf("decoding seccomp profile failed: %v", err)
|
||||
}
|
||||
return setupSeccomp(&config, specgen)
|
||||
}
|
||||
|
||||
var nativeToSeccomp = map[string]Arch{
|
||||
"amd64": ArchX86_64,
|
||||
"arm64": ArchAARCH64,
|
||||
"mips64": ArchMIPS64,
|
||||
"mips64n32": ArchMIPS64N32,
|
||||
"mipsel64": ArchMIPSEL64,
|
||||
"mipsel64n32": ArchMIPSEL64N32,
|
||||
"s390x": ArchS390X,
|
||||
}
|
||||
|
||||
func setupSeccomp(config *Seccomp, specgen *generate.Generator) error {
|
||||
if config == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// No default action specified, no syscalls listed, assume seccomp disabled
|
||||
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var arch string
|
||||
var native, err = libseccomp.GetNativeArch()
|
||||
if err == nil {
|
||||
arch = native.String()
|
||||
}
|
||||
|
||||
if len(config.Architectures) != 0 && len(config.ArchMap) != 0 {
|
||||
return errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'")
|
||||
}
|
||||
|
||||
customspec := specgen.Spec()
|
||||
customspec.Linux.Seccomp = &specs.Seccomp{}
|
||||
|
||||
// if config.Architectures == 0 then libseccomp will figure out the architecture to use
|
||||
if len(config.Architectures) != 0 {
|
||||
for _, a := range config.Architectures {
|
||||
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a))
|
||||
}
|
||||
}
|
||||
|
||||
if len(config.ArchMap) != 0 {
|
||||
for _, a := range config.ArchMap {
|
||||
seccompArch, ok := nativeToSeccomp[arch]
|
||||
if ok {
|
||||
if a.Arch == seccompArch {
|
||||
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a.Arch))
|
||||
for _, sa := range a.SubArches {
|
||||
customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(sa))
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
customspec.Linux.Seccomp.DefaultAction = specs.Action(config.DefaultAction)
|
||||
|
||||
Loop:
|
||||
// Loop through all syscall blocks and convert them to libcontainer format after filtering them
|
||||
for _, call := range config.Syscalls {
|
||||
if len(call.Excludes.Arches) > 0 {
|
||||
if stringutils.InSlice(call.Excludes.Arches, arch) {
|
||||
continue Loop
|
||||
}
|
||||
}
|
||||
if len(call.Excludes.Caps) > 0 {
|
||||
for _, c := range call.Excludes.Caps {
|
||||
if stringutils.InSlice(customspec.Process.Capabilities, c) {
|
||||
continue Loop
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(call.Includes.Arches) > 0 {
|
||||
if !stringutils.InSlice(call.Includes.Arches, arch) {
|
||||
continue Loop
|
||||
}
|
||||
}
|
||||
if len(call.Includes.Caps) > 0 {
|
||||
for _, c := range call.Includes.Caps {
|
||||
if !stringutils.InSlice(customspec.Process.Capabilities, c) {
|
||||
continue Loop
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if call.Name != "" && len(call.Names) != 0 {
|
||||
return errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'")
|
||||
}
|
||||
|
||||
if call.Name != "" {
|
||||
customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args))
|
||||
}
|
||||
|
||||
for _, n := range call.Names {
|
||||
customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(n, call.Action, call.Args))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createSpecsSyscall(name string, action Action, args []*Arg) specs.Syscall {
|
||||
newCall := specs.Syscall{
|
||||
Name: name,
|
||||
Action: specs.Action(action),
|
||||
}
|
||||
|
||||
// Loop through all the arguments of the syscall and convert them
|
||||
for _, arg := range args {
|
||||
newArg := specs.Arg{
|
||||
Index: arg.Index,
|
||||
Value: arg.Value,
|
||||
ValueTwo: arg.ValueTwo,
|
||||
Op: specs.Operator(arg.Op),
|
||||
}
|
||||
|
||||
newCall.Args = append(newCall.Args, newArg)
|
||||
}
|
||||
return newCall
|
||||
}
|
93
server/seccomp/types.go
Normal file
93
server/seccomp/types.go
Normal file
|
@ -0,0 +1,93 @@
|
|||
package seccomp
|
||||
|
||||
// Seccomp represents the config for a seccomp profile for syscall restriction.
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"defaultAction"`
|
||||
// Architectures is kept to maintain backward compatibility with the old
|
||||
// seccomp profile.
|
||||
Architectures []Arch `json:"architectures,omitempty"`
|
||||
ArchMap []Architecture `json:"archMap,omitempty"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
}
|
||||
|
||||
// Architecture is used to represent an specific architecture
|
||||
// and its sub-architectures
|
||||
type Architecture struct {
|
||||
Arch Arch `json:"architecture"`
|
||||
SubArches []Arch `json:"subArchitectures"`
|
||||
}
|
||||
|
||||
// Arch used for architectures
|
||||
type Arch string
|
||||
|
||||
// Additional architectures permitted to be used for system calls
|
||||
// By default only the native architecture of the kernel is permitted
|
||||
const (
|
||||
ArchX86 Arch = "SCMP_ARCH_X86"
|
||||
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
|
||||
ArchX32 Arch = "SCMP_ARCH_X32"
|
||||
ArchARM Arch = "SCMP_ARCH_ARM"
|
||||
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
|
||||
ArchMIPS Arch = "SCMP_ARCH_MIPS"
|
||||
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
|
||||
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
|
||||
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
|
||||
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
|
||||
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
|
||||
ArchPPC Arch = "SCMP_ARCH_PPC"
|
||||
ArchPPC64 Arch = "SCMP_ARCH_PPC64"
|
||||
ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE"
|
||||
ArchS390 Arch = "SCMP_ARCH_S390"
|
||||
ArchS390X Arch = "SCMP_ARCH_S390X"
|
||||
)
|
||||
|
||||
// Action taken upon Seccomp rule match
|
||||
type Action string
|
||||
|
||||
// Define actions for Seccomp rules
|
||||
const (
|
||||
ActKill Action = "SCMP_ACT_KILL"
|
||||
ActTrap Action = "SCMP_ACT_TRAP"
|
||||
ActErrno Action = "SCMP_ACT_ERRNO"
|
||||
ActTrace Action = "SCMP_ACT_TRACE"
|
||||
ActAllow Action = "SCMP_ACT_ALLOW"
|
||||
)
|
||||
|
||||
// Operator used to match syscall arguments in Seccomp
|
||||
type Operator string
|
||||
|
||||
// Define operators for syscall arguments in Seccomp
|
||||
const (
|
||||
OpNotEqual Operator = "SCMP_CMP_NE"
|
||||
OpLessThan Operator = "SCMP_CMP_LT"
|
||||
OpLessEqual Operator = "SCMP_CMP_LE"
|
||||
OpEqualTo Operator = "SCMP_CMP_EQ"
|
||||
OpGreaterEqual Operator = "SCMP_CMP_GE"
|
||||
OpGreaterThan Operator = "SCMP_CMP_GT"
|
||||
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
|
||||
)
|
||||
|
||||
// Arg used for matching specific syscall arguments in Seccomp
|
||||
type Arg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
ValueTwo uint64 `json:"valueTwo"`
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
// Filter is used to conditionally apply Seccomp rules
|
||||
type Filter struct {
|
||||
Caps []string `json:"caps,omitempty"`
|
||||
Arches []string `json:"arches,omitempty"`
|
||||
}
|
||||
|
||||
// Syscall is used to match a group of syscalls in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
Names []string `json:"names,omitempty"`
|
||||
Action Action `json:"action"`
|
||||
Args []*Arg `json:"args"`
|
||||
Comment string `json:"comment"`
|
||||
Includes Filter `json:"includes"`
|
||||
Excludes Filter `json:"excludes"`
|
||||
}
|
|
@ -7,11 +7,13 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/pkg/registrar"
|
||||
"github.com/docker/docker/pkg/truncindex"
|
||||
"github.com/kubernetes-incubator/cri-o/oci"
|
||||
"github.com/kubernetes-incubator/cri-o/server/seccomp"
|
||||
"github.com/kubernetes-incubator/cri-o/utils"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
rspec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
@ -34,6 +36,9 @@ type Server struct {
|
|||
podIDIndex *truncindex.TruncIndex
|
||||
ctrNameIndex *registrar.Registrar
|
||||
ctrIDIndex *truncindex.TruncIndex
|
||||
|
||||
seccompEnabled bool
|
||||
seccompProfile seccomp.Seccomp
|
||||
}
|
||||
|
||||
func (s *Server) loadContainer(id string) error {
|
||||
|
@ -223,6 +228,23 @@ func (s *Server) releaseContainerName(name string) {
|
|||
s.ctrNameIndex.Release(name)
|
||||
}
|
||||
|
||||
const (
|
||||
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
|
||||
SeccompModeFilter = uintptr(2)
|
||||
)
|
||||
|
||||
func seccompEnabled() bool {
|
||||
var enabled bool
|
||||
// Check if Seccomp is supported, via CONFIG_SECCOMP.
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
|
||||
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
return enabled
|
||||
}
|
||||
|
||||
// New creates a new Server with options provided
|
||||
func New(config *Config) (*Server, error) {
|
||||
// TODO: This will go away later when we have wrapper process or systemd acting as
|
||||
|
@ -259,7 +281,17 @@ func New(config *Config) (*Server, error) {
|
|||
sandboxes: sandboxes,
|
||||
containers: containers,
|
||||
},
|
||||
seccompEnabled: seccompEnabled(),
|
||||
}
|
||||
seccompProfile, err := ioutil.ReadFile(config.SeccompProfile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, err)
|
||||
}
|
||||
var seccompConfig seccomp.Seccomp
|
||||
if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil {
|
||||
return nil, fmt.Errorf("decoding seccomp profile failed: %v", err)
|
||||
}
|
||||
s.seccompProfile = seccompConfig
|
||||
|
||||
s.podIDIndex = truncindex.NewTruncIndex([]string{})
|
||||
s.podNameIndex = registrar.NewRegistrar()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue