cri-o/server/server.go
Nalin Dahyabhai c0333b102b Integrate containers/storage
Use containers/storage to store images, pod sandboxes, and containers.
A pod sandbox's infrastructure container has the same ID as the pod to
which it belongs, and all containers also keep track of their pod's ID.

The container configuration that we build using the data in a
CreateContainerRequest is stored in the container's ContainerDirectory
and ContainerRunDirectory.

We catch SIGTERM and SIGINT, and when we receive either, we gracefully
exit the grpc loop.  If we also think that there aren't any container
filesystems in use, we attempt to do a clean shutdown of the storage
driver.

The test harness now waits for ocid to exit before attempting to delete
the storage root directory.

Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
2017-01-18 10:23:30 -05:00

553 lines
15 KiB
Go

package server
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"sync"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/containers/image/types"
sstorage "github.com/containers/storage/storage"
"github.com/docker/docker/pkg/registrar"
"github.com/docker/docker/pkg/truncindex"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/kubernetes-incubator/cri-o/pkg/storage"
"github.com/kubernetes-incubator/cri-o/server/apparmor"
"github.com/kubernetes-incubator/cri-o/server/seccomp"
"github.com/opencontainers/runc/libcontainer/label"
rspec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/rajatchopra/ocicni"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
const (
runtimeAPIVersion = "v1alpha1"
)
// Server implements the RuntimeService and ImageService
type Server struct {
config Config
runtime *oci.Runtime
store sstorage.Store
images storage.ImageServer
storage storage.RuntimeServer
stateLock sync.Mutex
state *serverState
netPlugin ocicni.CNIPlugin
podNameIndex *registrar.Registrar
podIDIndex *truncindex.TruncIndex
ctrNameIndex *registrar.Registrar
ctrIDIndex *truncindex.TruncIndex
imageContext *types.SystemContext
seccompEnabled bool
seccompProfile seccomp.Seccomp
appArmorEnabled bool
appArmorProfile string
}
func (s *Server) loadContainer(id string) error {
config, err := s.store.GetFromContainerDirectory(id, "config.json")
if err != nil {
return err
}
var m rspec.Spec
if err = json.Unmarshal(config, &m); err != nil {
return err
}
labels := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil {
return err
}
name := m.Annotations["ocid/name"]
name, err = s.reserveContainerName(id, name)
if err != nil {
return err
}
var metadata pb.ContainerMetadata
if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil {
return err
}
sb := s.getSandbox(m.Annotations["ocid/sandbox_id"])
if sb == nil {
logrus.Warnf("could not get sandbox with id %s, skipping", m.Annotations["ocid/sandbox_id"])
return nil
}
var tty bool
if v := m.Annotations["ocid/tty"]; v == "true" {
tty = true
}
containerPath, err := s.store.GetContainerRunDirectory(id)
if err != nil {
return err
}
var img *pb.ImageSpec
image, ok := m.Annotations["ocid/image"]
if ok {
img = &pb.ImageSpec{
Image: &image,
}
}
annotations := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil {
return err
}
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty)
if err != nil {
return err
}
s.addContainer(ctr)
if err = s.runtime.UpdateStatus(ctr); err != nil {
logrus.Warnf("error updating status for container %s: %v", ctr.ID(), err)
}
if err = s.ctrIDIndex.Add(id); err != nil {
return err
}
return nil
}
func configNetNsPath(spec rspec.Spec) (string, error) {
for _, ns := range spec.Linux.Namespaces {
if ns.Type != rspec.NetworkNamespace {
continue
}
if ns.Path == "" {
return "", fmt.Errorf("empty networking namespace")
}
return ns.Path, nil
}
return "", fmt.Errorf("missing networking namespace")
}
func (s *Server) loadSandbox(id string) error {
config, err := s.store.GetFromContainerDirectory(id, "config.json")
if err != nil {
return err
}
var m rspec.Spec
if err = json.Unmarshal(config, &m); err != nil {
return err
}
labels := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/labels"]), &labels); err != nil {
return err
}
name := m.Annotations["ocid/name"]
name, err = s.reservePodName(id, name)
if err != nil {
return err
}
var metadata pb.PodSandboxMetadata
if err = json.Unmarshal([]byte(m.Annotations["ocid/metadata"]), &metadata); err != nil {
return err
}
processLabel, mountLabel, err := label.InitLabels(label.DupSecOpt(m.Process.SelinuxLabel))
if err != nil {
return err
}
annotations := make(map[string]string)
if err = json.Unmarshal([]byte(m.Annotations["ocid/annotations"]), &annotations); err != nil {
return err
}
sb := &sandbox{
id: id,
name: name,
logDir: m.Annotations["ocid/log_path"],
labels: labels,
containers: oci.NewMemoryStore(),
processLabel: processLabel,
mountLabel: mountLabel,
annotations: annotations,
metadata: &metadata,
shmPath: m.Annotations["ocid/shm_path"],
}
// We add a netNS only if we can load a permanent one.
// Otherwise, the sandbox will live in the host namespace.
netNsPath, err := configNetNsPath(m)
if err == nil {
netNS, nsErr := netNsGet(netNsPath, sb.name)
// If we can't load the networking namespace
// because it's closed, we just set the sb netns
// pointer to nil. Otherwise we return an error.
if nsErr != nil && nsErr != errSandboxClosedNetNS {
return nsErr
}
sb.netns = netNS
}
s.addSandbox(sb)
sandboxPath, err := s.store.GetContainerRunDirectory(id)
if err != nil {
return err
}
if err = label.ReserveLabel(processLabel); err != nil {
return err
}
cname, err := s.reserveContainerName(m.Annotations["ocid/container_id"], m.Annotations["ocid/container_name"])
if err != nil {
return err
}
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false)
if err != nil {
return err
}
sb.infraContainer = scontainer
if err = s.runtime.UpdateStatus(scontainer); err != nil {
logrus.Warnf("error updating status for pod sandbox infra container %s: %v", scontainer.ID(), err)
}
if err = s.ctrIDIndex.Add(scontainer.ID()); err != nil {
return err
}
if err = s.podIDIndex.Add(id); err != nil {
return err
}
return nil
}
func (s *Server) restore() {
containers, err := s.store.Containers()
if err != nil && !os.IsNotExist(err) {
logrus.Warnf("could not read containers and sandboxes: %v", err)
}
pods := map[string]*storage.RuntimeContainerMetadata{}
podContainers := map[string]*storage.RuntimeContainerMetadata{}
for _, container := range containers {
metadata, err2 := s.storage.GetContainerMetadata(container.ID)
if err2 != nil {
logrus.Warnf("error parsing metadata for %s: %v, ignoring", container.ID, err2)
continue
}
if metadata.Pod {
pods[container.ID] = &metadata
} else {
podContainers[container.ID] = &metadata
}
}
for containerID, metadata := range pods {
if err = s.loadSandbox(containerID); err != nil {
logrus.Warnf("could not restore sandbox %s container %s: %v", metadata.PodID, containerID, err)
}
}
for containerID := range podContainers {
if err := s.loadContainer(containerID); err != nil {
logrus.Warnf("could not restore container %s: %v", containerID, err)
}
}
}
// Update makes changes to the server's state (lists of pods and containers) to
// reflect the list of pods and containers that are stored on disk, possibly
// having been modified by other parties
func (s *Server) Update() {
logrus.Debugf("updating sandbox and container information")
if err := s.update(); err != nil {
logrus.Errorf("error updating sandbox and container information: %v", err)
}
}
func (s *Server) update() error {
containers, err := s.store.Containers()
if err != nil && !os.IsNotExist(err) {
logrus.Warnf("could not read containers and sandboxes: %v", err)
return err
}
newPods := map[string]*storage.RuntimeContainerMetadata{}
oldPods := map[string]string{}
removedPods := map[string]string{}
newPodContainers := map[string]*storage.RuntimeContainerMetadata{}
oldPodContainers := map[string]string{}
removedPodContainers := map[string]string{}
for _, container := range containers {
if s.hasSandbox(container.ID) {
// FIXME: do we need to reload/update any info about the sandbox?
oldPods[container.ID] = container.ID
oldPodContainers[container.ID] = container.ID
continue
}
if s.getContainer(container.ID) != nil {
// FIXME: do we need to reload/update any info about the container?
oldPodContainers[container.ID] = container.ID
continue
}
// not previously known, so figure out what it is
metadata, err2 := s.storage.GetContainerMetadata(container.ID)
if err2 != nil {
logrus.Errorf("error parsing metadata for %s: %v, ignoring", container.ID, err2)
continue
}
if metadata.Pod {
newPods[container.ID] = &metadata
} else {
newPodContainers[container.ID] = &metadata
}
}
s.ctrIDIndex.Iterate(func(id string) {
if _, ok := oldPodContainers[id]; !ok {
// this container's ID wasn't in the updated list -> removed
removedPodContainers[id] = id
}
})
for removedPodContainer := range removedPodContainers {
// forget this container
c := s.getContainer(removedPodContainer)
s.releaseContainerName(c.Name())
s.removeContainer(c)
if err = s.ctrIDIndex.Delete(c.ID()); err != nil {
return err
}
logrus.Debugf("forgetting removed pod container %s", c.ID())
}
s.podIDIndex.Iterate(func(id string) {
if _, ok := oldPods[id]; !ok {
// this pod's ID wasn't in the updated list -> removed
removedPods[id] = id
}
})
for removedPod := range removedPods {
// forget this pod
sb := s.getSandbox(removedPod)
podInfraContainer := sb.infraContainer
s.releaseContainerName(podInfraContainer.Name())
s.removeContainer(podInfraContainer)
if err = s.ctrIDIndex.Delete(podInfraContainer.ID()); err != nil {
return err
}
sb.infraContainer = nil
s.releasePodName(sb.name)
s.removeSandbox(sb.id)
if err = s.podIDIndex.Delete(sb.id); err != nil {
return err
}
logrus.Debugf("forgetting removed pod %s", sb.id)
}
for sandboxID := range newPods {
// load this pod
if err = s.loadSandbox(sandboxID); err != nil {
logrus.Warnf("could not load new pod sandbox %s: %v, ignoring", sandboxID, err)
} else {
logrus.Debugf("loaded new pod sandbox %s", sandboxID, err)
}
}
for containerID := range newPodContainers {
// load this container
if err = s.loadContainer(containerID); err != nil {
logrus.Warnf("could not load new sandbox container %s: %v, ignoring", containerID, err)
} else {
logrus.Debugf("loaded new pod container %s", containerID, err)
}
}
return nil
}
func (s *Server) reservePodName(id, name string) (string, error) {
if err := s.podNameIndex.Reserve(name, id); err != nil {
if err == registrar.ErrNameReserved {
id, err := s.podNameIndex.Get(name)
if err != nil {
logrus.Warnf("conflict, pod name %q already reserved", name)
return "", err
}
return "", fmt.Errorf("conflict, name %q already reserved for pod %q", name, id)
}
return "", fmt.Errorf("error reserving pod name %q", name)
}
return name, nil
}
func (s *Server) releasePodName(name string) {
s.podNameIndex.Release(name)
}
func (s *Server) reserveContainerName(id, name string) (string, error) {
if err := s.ctrNameIndex.Reserve(name, id); err != nil {
if err == registrar.ErrNameReserved {
id, err := s.ctrNameIndex.Get(name)
if err != nil {
logrus.Warnf("conflict, ctr name %q already reserved", name)
return "", err
}
return "", fmt.Errorf("conflict, name %q already reserved for ctr %q", name, id)
}
return "", fmt.Errorf("error reserving ctr name %s", name)
}
return name, nil
}
func (s *Server) releaseContainerName(name string) {
s.ctrNameIndex.Release(name)
}
const (
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
SeccompModeFilter = uintptr(2)
)
func seccompEnabled() bool {
var enabled bool
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
enabled = true
}
}
return enabled
}
// Shutdown attempts to shut down the server's storage cleanly
func (s *Server) Shutdown() error {
_, err := s.store.Shutdown(false)
return err
}
// New creates a new Server with options provided
func New(config *Config) (*Server, error) {
store, err := sstorage.GetStore(sstorage.StoreOptions{
RunRoot: config.RunRoot,
GraphRoot: config.Root,
GraphDriverName: config.Storage,
GraphDriverOptions: config.StorageOptions,
})
if err != nil {
return nil, err
}
imageService, err := storage.GetImageService(store, config.DefaultTransport)
if err != nil {
return nil, err
}
storageRuntimeService := storage.GetRuntimeService(imageService)
if err != nil {
return nil, err
}
r, err := oci.New(config.Runtime, config.Conmon, config.ConmonEnv, config.CgroupManager)
if err != nil {
return nil, err
}
sandboxes := make(map[string]*sandbox)
containers := oci.NewMemoryStore()
netPlugin, err := ocicni.InitCNI(config.NetworkDir)
if err != nil {
return nil, err
}
s := &Server{
runtime: r,
store: store,
images: imageService,
storage: storageRuntimeService,
netPlugin: netPlugin,
config: *config,
state: &serverState{
sandboxes: sandboxes,
containers: containers,
},
seccompEnabled: seccompEnabled(),
appArmorEnabled: apparmor.IsEnabled(),
appArmorProfile: config.ApparmorProfile,
}
seccompProfile, err := ioutil.ReadFile(config.SeccompProfile)
if err != nil {
return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, err)
}
var seccompConfig seccomp.Seccomp
if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil {
return nil, fmt.Errorf("decoding seccomp profile failed: %v", err)
}
s.seccompProfile = seccompConfig
if s.appArmorEnabled && s.appArmorProfile == apparmor.DefaultApparmorProfile {
if err := apparmor.EnsureDefaultApparmorProfile(); err != nil {
return nil, fmt.Errorf("ensuring the default apparmor profile is installed failed: %v", err)
}
}
s.podIDIndex = truncindex.NewTruncIndex([]string{})
s.podNameIndex = registrar.NewRegistrar()
s.ctrIDIndex = truncindex.NewTruncIndex([]string{})
s.ctrNameIndex = registrar.NewRegistrar()
s.imageContext = &types.SystemContext{
SignaturePolicyPath: config.ImageConfig.SignaturePolicyPath,
}
s.restore()
logrus.Debugf("sandboxes: %v", s.state.sandboxes)
logrus.Debugf("containers: %v", s.state.containers)
return s, nil
}
type serverState struct {
sandboxes map[string]*sandbox
containers oci.Store
}
func (s *Server) addSandbox(sb *sandbox) {
s.stateLock.Lock()
s.state.sandboxes[sb.id] = sb
s.stateLock.Unlock()
}
func (s *Server) getSandbox(id string) *sandbox {
s.stateLock.Lock()
sb := s.state.sandboxes[id]
s.stateLock.Unlock()
return sb
}
func (s *Server) hasSandbox(id string) bool {
s.stateLock.Lock()
_, ok := s.state.sandboxes[id]
s.stateLock.Unlock()
return ok
}
func (s *Server) removeSandbox(id string) {
s.stateLock.Lock()
delete(s.state.sandboxes, id)
s.stateLock.Unlock()
}
func (s *Server) addContainer(c *oci.Container) {
s.stateLock.Lock()
sandbox := s.state.sandboxes[c.Sandbox()]
// TODO(runcom): handle !ok above!!! otherwise it panics!
sandbox.addContainer(c)
s.state.containers.Add(c.ID(), c)
s.stateLock.Unlock()
}
func (s *Server) getContainer(id string) *oci.Container {
s.stateLock.Lock()
c := s.state.containers.Get(id)
s.stateLock.Unlock()
return c
}
func (s *Server) removeContainer(c *oci.Container) {
s.stateLock.Lock()
sandbox := s.state.sandboxes[c.Sandbox()]
sandbox.removeContainer(c)
s.state.containers.Delete(c.ID())
s.stateLock.Unlock()
}