cri-o/server/server.go
Samuel Ortiz 0df8200e12
sandbox: Create a symbolic link to the networking namespace
In order to workaround a bug introduced with runc commit bc84f833,
we create a symbolic link to our permanent networking namespace so
that runC realizes that this is not the host namespace.

Although this bug is now fixed upstream (See commit f33de5ab4), this
patch works with pre rc3 runC versions.
We may want to revert that patch once runC 1.0.0 is released.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
2016-12-12 19:48:23 +01:00

417 lines
11 KiB
Go

package server
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sync"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/registrar"
"github.com/docker/docker/pkg/truncindex"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/kubernetes-incubator/cri-o/server/apparmor"
"github.com/kubernetes-incubator/cri-o/server/seccomp"
"github.com/kubernetes-incubator/cri-o/utils"
"github.com/opencontainers/runc/libcontainer/label"
rspec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/rajatchopra/ocicni"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
const (
runtimeAPIVersion = "v1alpha1"
)
// Server implements the RuntimeService and ImageService
type Server struct {
config Config
runtime *oci.Runtime
stateLock sync.Mutex
state *serverState
netPlugin ocicni.CNIPlugin
podNameIndex *registrar.Registrar
podIDIndex *truncindex.TruncIndex
ctrNameIndex *registrar.Registrar
ctrIDIndex *truncindex.TruncIndex
seccompEnabled bool
seccompProfile seccomp.Seccomp
appArmorEnabled bool
appArmorProfile string
}
func (s *Server) loadContainer(id string) error {
config, err := ioutil.ReadFile(filepath.Join(s.runtime.ContainerDir(), id, "config.json"))
if err != nil {
return err
}
var m rspec.Spec
if err = json.Unmarshal(config, &m); err != nil {
return err
}
labels := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil {
return err
}
name := m.Annotations["ocid/name"]
name, err = s.reserveContainerName(id, name)
if err != nil {
return err
}
var metadata pb.ContainerMetadata
if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil {
return err
}
sb := s.getSandbox(m.Annotations["ocid/sandbox_id"])
if sb == nil {
logrus.Warnf("could not get sandbox with id %s, skipping", m.Annotations["ocid/sandbox_id"])
return nil
}
var tty bool
if v := m.Annotations["ocid/tty"]; v == "true" {
tty = true
}
containerPath := filepath.Join(s.runtime.ContainerDir(), id)
var img *pb.ImageSpec
image, ok := m.Annotations["ocid/image"]
if ok {
img = &pb.ImageSpec{
Image: &image,
}
}
annotations := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil {
return err
}
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty)
if err != nil {
return err
}
s.addContainer(ctr)
if err = s.runtime.UpdateStatus(ctr); err != nil {
logrus.Warnf("error updating status for container %s: %v", ctr.ID(), err)
}
if err = s.ctrIDIndex.Add(id); err != nil {
return err
}
return nil
}
func configNetNsPath(spec rspec.Spec) (string, error) {
for _, ns := range spec.Linux.Namespaces {
if ns.Type != rspec.NetworkNamespace {
continue
}
if ns.Path == "" {
return "", fmt.Errorf("empty networking namespace")
}
return ns.Path, nil
}
return "", fmt.Errorf("missing networking namespace")
}
func (s *Server) loadSandbox(id string) error {
config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json"))
if err != nil {
return err
}
var m rspec.Spec
if err = json.Unmarshal(config, &m); err != nil {
return err
}
labels := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil {
return err
}
name := m.Annotations["ocid/name"]
name, err = s.reservePodName(id, name)
if err != nil {
return err
}
var metadata pb.PodSandboxMetadata
if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil {
return err
}
processLabel, mountLabel, err := label.InitLabels(label.DupSecOpt(m.Process.SelinuxLabel))
if err != nil {
return err
}
annotations := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil {
return err
}
sb := &sandbox{
id: id,
name: name,
logDir: m.Annotations["ocid/log_path"],
labels: labels,
containers: oci.NewMemoryStore(),
processLabel: processLabel,
mountLabel: mountLabel,
annotations: annotations,
metadata: &metadata,
shmPath: m.Annotations["ocid/shm_path"],
}
// We add a netNS only if we can load a permanent one.
// Otherwise, the sandbox will live in the host namespace.
netNsPath, err := configNetNsPath(m)
if err == nil {
netNS, nsErr := netNsGet(netNsPath, sb.name)
// If we can't load the networking namespace
// because it's closed, we just set the sb netns
// pointer to nil. Otherwise we return an error.
if nsErr != nil && nsErr != errSandboxClosedNetNS {
return nsErr
}
sb.netns = netNS
}
s.addSandbox(sb)
sandboxPath := filepath.Join(s.config.SandboxDir, id)
if err = label.ReserveLabel(processLabel); err != nil {
return err
}
cname, err := s.reserveContainerName(m.Annotations["ocid/container_id"], m.Annotations["ocid/container_name"])
if err != nil {
return err
}
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false)
if err != nil {
return err
}
sb.infraContainer = scontainer
if err = s.runtime.UpdateStatus(scontainer); err != nil {
logrus.Warnf("error updating status for container %s: %v", scontainer.ID(), err)
}
if err = s.ctrIDIndex.Add(scontainer.ID()); err != nil {
return err
}
if err = s.podIDIndex.Add(id); err != nil {
return err
}
return nil
}
func (s *Server) restore() {
sandboxDir, err := ioutil.ReadDir(s.config.SandboxDir)
if err != nil && !os.IsNotExist(err) {
logrus.Warnf("could not read sandbox directory %s: %v", sandboxDir, err)
}
for _, v := range sandboxDir {
if !v.IsDir() {
continue
}
if err = s.loadSandbox(v.Name()); err != nil {
logrus.Warnf("could not restore sandbox %s: %v", v.Name(), err)
}
}
containerDir, err := ioutil.ReadDir(s.runtime.ContainerDir())
if err != nil && !os.IsNotExist(err) {
logrus.Warnf("could not read container directory %s: %v", s.runtime.ContainerDir(), err)
}
for _, v := range containerDir {
if !v.IsDir() {
continue
}
if err := s.loadContainer(v.Name()); err != nil {
logrus.Warnf("could not restore container %s: %v", v.Name(), err)
}
}
}
func (s *Server) reservePodName(id, name string) (string, error) {
if err := s.podNameIndex.Reserve(name, id); err != nil {
if err == registrar.ErrNameReserved {
id, err := s.podNameIndex.Get(name)
if err != nil {
logrus.Warnf("conflict, pod name %q already reserved", name)
return "", err
}
return "", fmt.Errorf("conflict, name %q already reserved for pod %q", name, id)
}
return "", fmt.Errorf("error reserving pod name %q", name)
}
return name, nil
}
func (s *Server) releasePodName(name string) {
s.podNameIndex.Release(name)
}
func (s *Server) reserveContainerName(id, name string) (string, error) {
if err := s.ctrNameIndex.Reserve(name, id); err != nil {
if err == registrar.ErrNameReserved {
id, err := s.ctrNameIndex.Get(name)
if err != nil {
logrus.Warnf("conflict, ctr name %q already reserved", name)
return "", err
}
return "", fmt.Errorf("conflict, name %q already reserved for ctr %q", name, id)
}
return "", fmt.Errorf("error reserving ctr name %s", name)
}
return name, nil
}
func (s *Server) releaseContainerName(name string) {
s.ctrNameIndex.Release(name)
}
const (
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
SeccompModeFilter = uintptr(2)
)
func seccompEnabled() bool {
var enabled bool
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
enabled = true
}
}
return enabled
}
// New creates a new Server with options provided
func New(config *Config) (*Server, error) {
// TODO: This will go away later when we have wrapper process or systemd acting as
// subreaper.
if err := utils.SetSubreaper(1); err != nil {
return nil, fmt.Errorf("failed to set server as subreaper: %v", err)
}
utils.StartReaper()
if err := os.MkdirAll(config.ImageDir, 0755); err != nil {
return nil, err
}
if err := os.MkdirAll(config.SandboxDir, 0755); err != nil {
return nil, err
}
r, err := oci.New(config.Runtime, config.ContainerDir, config.Conmon, config.ConmonEnv)
if err != nil {
return nil, err
}
sandboxes := make(map[string]*sandbox)
containers := oci.NewMemoryStore()
netPlugin, err := ocicni.InitCNI("")
if err != nil {
return nil, err
}
s := &Server{
runtime: r,
netPlugin: netPlugin,
config: *config,
state: &serverState{
sandboxes: sandboxes,
containers: containers,
},
seccompEnabled: seccompEnabled(),
appArmorEnabled: apparmor.IsEnabled(),
}
seccompProfile, err := ioutil.ReadFile(config.SeccompProfile)
if err != nil {
return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, err)
}
var seccompConfig seccomp.Seccomp
if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil {
return nil, fmt.Errorf("decoding seccomp profile failed: %v", err)
}
s.seccompProfile = seccompConfig
if s.appArmorEnabled {
apparmor.InstallDefaultAppArmorProfile()
}
s.appArmorProfile = config.ApparmorProfile
s.podIDIndex = truncindex.NewTruncIndex([]string{})
s.podNameIndex = registrar.NewRegistrar()
s.ctrIDIndex = truncindex.NewTruncIndex([]string{})
s.ctrNameIndex = registrar.NewRegistrar()
s.restore()
logrus.Debugf("sandboxes: %v", s.state.sandboxes)
logrus.Debugf("containers: %v", s.state.containers)
return s, nil
}
type serverState struct {
sandboxes map[string]*sandbox
containers oci.Store
}
func (s *Server) addSandbox(sb *sandbox) {
s.stateLock.Lock()
s.state.sandboxes[sb.id] = sb
s.stateLock.Unlock()
}
func (s *Server) getSandbox(id string) *sandbox {
s.stateLock.Lock()
sb := s.state.sandboxes[id]
s.stateLock.Unlock()
return sb
}
func (s *Server) hasSandbox(id string) bool {
s.stateLock.Lock()
_, ok := s.state.sandboxes[id]
s.stateLock.Unlock()
return ok
}
func (s *Server) removeSandbox(id string) {
s.stateLock.Lock()
delete(s.state.sandboxes, id)
s.stateLock.Unlock()
}
func (s *Server) addContainer(c *oci.Container) {
s.stateLock.Lock()
sandbox := s.state.sandboxes[c.Sandbox()]
// TODO(runcom): handle !ok above!!! otherwise it panics!
sandbox.addContainer(c)
s.state.containers.Add(c.ID(), c)
s.stateLock.Unlock()
}
func (s *Server) getContainer(id string) *oci.Container {
s.stateLock.Lock()
c := s.state.containers.Get(id)
s.stateLock.Unlock()
return c
}
func (s *Server) removeContainer(c *oci.Container) {
s.stateLock.Lock()
sandbox := s.state.sandboxes[c.Sandbox()]
sandbox.removeContainer(c)
s.state.containers.Delete(c.ID())
s.stateLock.Unlock()
}