sandbox: Use persistent networking namespace

Because they need to prepare the hypervisor networking interfaces
and have them match the ones created in the pod networking
namespace (typically to bridge TAP and veth interfaces), hypervisor
based container runtimes need the sandbox pod networking namespace
to be set up before it's created. They can then prepare and start
the hypervisor interfaces when creating the pod virtual machine.

In order to do so, we need to create per pod persitent networking
namespaces that we pass to the CNI plugin. This patch leverages
the CNI ns package to create such namespaces under /var/run/netns,
and assign them to all pod containers.
The persitent namespace is removed when either the pod is stopped
or removed.

Since the StopPodSandbox() API can be called multiple times from
kubelet, we track the pod networking namespace state (closed or
not) so that we don't get a containernetworking/ns package error
when calling its Close() routine multiple times as well.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
This commit is contained in:
Samuel Ortiz 2016-11-23 18:16:21 +01:00
parent be3ed3bcbc
commit 4cab8ed06a
No known key found for this signature in database
GPG key ID: 8A803CDD4F566C4A
7 changed files with 193 additions and 24 deletions

View file

@ -18,6 +18,7 @@ import (
"github.com/Sirupsen/logrus"
"github.com/kubernetes-incubator/cri-o/utils"
"github.com/containernetworking/cni/pkg/ns"
"golang.org/x/sys/unix"
"k8s.io/kubernetes/pkg/fields"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
@ -344,6 +345,7 @@ type Container struct {
annotations fields.Set
image *pb.ImageSpec
sandbox string
netns ns.NetNS
terminal bool
state *ContainerState
metadata *pb.ContainerMetadata
@ -360,7 +362,7 @@ type ContainerState struct {
}
// NewContainer creates a container object.
func NewContainer(id string, name string, bundlePath string, logPath string, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) {
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) {
c := &Container{
id: id,
name: name,
@ -368,6 +370,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, lab
logPath: logPath,
labels: labels,
sandbox: sandbox,
netns: netns,
terminal: terminal,
metadata: metadata,
annotations: annotations,
@ -421,7 +424,12 @@ func (c *Container) NetNsPath() (string, error) {
if c.state == nil {
return "", fmt.Errorf("container state is not populated")
}
if c.netns == nil {
return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil
}
return c.netns.Path(), nil
}
// Metadata returns the metadata of the container.

View file

@ -273,14 +273,20 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
logrus.Debugf("pod container state %+v", podInfraState)
for nsType, nsFile := range map[string]string{
"ipc": "ipc",
"network": "net",
} {
nsPath := fmt.Sprintf("/proc/%d/ns/%s", podInfraState.Pid, nsFile)
if err := specgen.AddOrReplaceLinuxNamespace(nsType, nsPath); err != nil {
ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid)
if err := specgen.AddOrReplaceLinuxNamespace("ipc", ipcNsPath); err != nil {
return nil, err
}
netNsPath := sb.netNsPath()
if netNsPath == "" {
// The sandbox does not have a permanent namespace,
// it's on the host one.
netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid)
}
if err := specgen.AddOrReplaceLinuxNamespace("network", netNsPath); err != nil {
return nil, err
}
imageSpec := containerConfig.GetImage()
@ -336,7 +342,7 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
return nil, err
}
container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty())
container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty())
if err != nil {
return nil, err
}

View file

@ -3,13 +3,46 @@ package server
import (
"errors"
"fmt"
"sync"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/stringid"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/containernetworking/cni/pkg/ns"
"k8s.io/kubernetes/pkg/fields"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
)
type sandboxNetNs struct {
sync.Mutex
ns ns.NetNS
closed bool
}
func netNsGet(nspath string) (*sandboxNetNs, error) {
if err := ns.IsNSorErr(nspath); err != nil {
return nil, errSandboxClosedNetNS
}
netNS, err := ns.GetNS(nspath)
if err != nil {
return nil, err
}
return &sandboxNetNs{ns: netNS, closed: false,}, nil
}
func hostNetNsPath() (string, error) {
netNS, err := ns.GetCurrentNS()
if err != nil {
return "", err
}
defer netNS.Close()
return netNS.Path(), nil
}
type sandbox struct {
id string
name string
@ -20,6 +53,7 @@ type sandbox struct {
containers oci.Store
processLabel string
mountLabel string
netns *sandboxNetNs
metadata *pb.PodSandboxMetadata
shmPath string
}
@ -31,6 +65,7 @@ const (
var (
errSandboxIDEmpty = errors.New("PodSandboxId should not be empty")
errSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed")
)
func (s *sandbox) addContainer(c *oci.Container) {
@ -45,6 +80,63 @@ func (s *sandbox) removeContainer(c *oci.Container) {
s.containers.Delete(c.Name())
}
func (s *sandbox) netNs() ns.NetNS {
if s.netns == nil {
return nil
}
return s.netns.ns
}
func (s *sandbox) netNsPath() string {
if s.netns == nil {
return ""
}
return s.netns.ns.Path()
}
func (s *sandbox) netNsCreate() error {
if s.netns != nil {
return fmt.Errorf("net NS already created")
}
netNS, err := ns.NewNS()
if err != nil {
return err
}
s.netns = &sandboxNetNs{
ns: netNS,
closed: false,
}
return nil
}
func (s *sandbox) netNsRemove() error {
if s.netns == nil {
logrus.Warn("no networking namespace")
return nil
}
s.netns.Lock()
defer s.netns.Unlock()
if s.netns.closed {
// netNsRemove() can be called multiple
// times without returning an error.
return nil
}
if err := s.netns.ns.Close(); err != nil {
return err
}
s.netns.closed = true
return nil
}
func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) {
var (
err error

View file

@ -78,6 +78,9 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
if err := os.RemoveAll(podSandboxDir); err != nil {
return nil, fmt.Errorf("failed to remove sandbox %s directory: %v", sb.id, err)
}
if err := sb.netNsRemove(); err != nil {
return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err)
}
s.releaseContainerName(podInfraContainer.Name())
s.removeContainer(podInfraContainer)
sb.infraContainer = nil

View file

@ -18,9 +18,9 @@ import (
)
// RunPodSandbox creates and runs a pod-level sandbox.
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (*pb.RunPodSandboxResponse, error) {
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
logrus.Debugf("RunPodSandboxRequest %+v", req)
var processLabel, mountLabel string
var processLabel, mountLabel, netNsPath string
// process req.Name
name := req.GetConfig().GetMetadata().GetName()
if name == "" {
@ -30,7 +30,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
namespace := req.GetConfig().GetMetadata().GetNamespace()
attempt := req.GetConfig().GetMetadata().GetAttempt()
var err error
id, name, err := s.generatePodIDandName(name, namespace, attempt)
if err != nil {
return nil, err
@ -235,6 +234,34 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
if err != nil {
return nil, err
}
netNsPath, err = hostNetNsPath()
if err != nil {
return nil, err
}
} else {
// Create the sandbox network namespace
if err = sb.netNsCreate(); err != nil {
return nil, err
}
defer func() {
if err == nil {
return
}
if netnsErr := sb.netNsRemove(); netnsErr != nil {
logrus.Warnf("Failed to remove networking namespace: %v", netnsErr)
}
} ()
// Pass the created namespace path to the runtime
err = g.AddOrReplaceLinuxNamespace("network", sb.netNsPath())
if err != nil {
return nil, err
}
netNsPath = sb.netNsPath()
}
if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
@ -267,7 +294,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
}
}
container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, labels, annotations, nil, nil, id, false)
container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, sb.netNs(), labels, annotations, nil, nil, id, false)
if err != nil {
return nil, err
}
@ -284,11 +311,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
// setup the network
podNamespace := ""
netnsPath, err := container.NetNsPath()
if err != nil {
return nil, err
}
if err = s.netPlugin.SetUpPod(netnsPath, podNamespace, id, containerName); err != nil {
if err = s.netPlugin.SetUpPod(netNsPath, podNamespace, id, containerName); err != nil {
return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err)
}
@ -300,7 +323,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err
}
resp := &pb.RunPodSandboxResponse{PodSandboxId: &id}
resp = &pb.RunPodSandboxResponse{PodSandboxId: &id}
logrus.Debugf("RunPodSandboxResponse: %+v", resp)
return resp, nil
}

View file

@ -35,6 +35,11 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque
podInfraContainer.Name(), sb.id, err)
}
// Close the sandbox networking namespace.
if err := sb.netNsRemove(); err != nil {
return nil, err
}
containers := sb.containers.List()
containers = append(containers, podInfraContainer)

View file

@ -92,7 +92,7 @@ func (s *Server) loadContainer(id string) error {
return err
}
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], labels, annotations, img, &metadata, sb.id, tty)
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty)
if err != nil {
return err
}
@ -106,6 +106,22 @@ func (s *Server) loadContainer(id string) error {
return nil
}
func configNetNsPath(spec rspec.Spec) (string, error) {
for _, ns := range spec.Linux.Namespaces {
if ns.Type != rspec.NetworkNamespace {
continue
}
if ns.Path == "" {
return "", fmt.Errorf("empty networking namespace")
}
return ns.Path, nil
}
return "", fmt.Errorf("missing networking namespace")
}
func (s *Server) loadSandbox(id string) error {
config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json"))
if err != nil {
@ -151,6 +167,22 @@ func (s *Server) loadSandbox(id string) error {
metadata: &metadata,
shmPath: m.Annotations["ocid/shm_path"],
}
// We add a netNS only if we can load a permanent one.
// Otherwise, the sandbox will live in the host namespace.
netNsPath, err := configNetNsPath(m)
if err == nil {
netNS, nsErr := netNsGet(netNsPath)
// If we can't load the networking namespace
// because it's closed, we just set the sb netns
// pointer to nil. Otherwise we return an error.
if nsErr != nil && nsErr != errSandboxClosedNetNS {
return nsErr
}
sb.netns = netNS
}
s.addSandbox(sb)
sandboxPath := filepath.Join(s.config.SandboxDir, id)
@ -163,7 +195,7 @@ func (s *Server) loadSandbox(id string) error {
if err != nil {
return err
}
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, labels, annotations, nil, nil, id, false)
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false)
if err != nil {
return err
}