sandbox: Use persistent networking namespace

Because they need to prepare the hypervisor networking interfaces
and have them match the ones created in the pod networking
namespace (typically to bridge TAP and veth interfaces), hypervisor
based container runtimes need the sandbox pod networking namespace
to be set up before it's created. They can then prepare and start
the hypervisor interfaces when creating the pod virtual machine.

In order to do so, we need to create per pod persitent networking
namespaces that we pass to the CNI plugin. This patch leverages
the CNI ns package to create such namespaces under /var/run/netns,
and assign them to all pod containers.
The persitent namespace is removed when either the pod is stopped
or removed.

Since the StopPodSandbox() API can be called multiple times from
kubelet, we track the pod networking namespace state (closed or
not) so that we don't get a containernetworking/ns package error
when calling its Close() routine multiple times as well.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
This commit is contained in:
Samuel Ortiz 2016-11-23 18:16:21 +01:00
parent be3ed3bcbc
commit 4cab8ed06a
No known key found for this signature in database
GPG key ID: 8A803CDD4F566C4A
7 changed files with 193 additions and 24 deletions

View file

@ -18,6 +18,7 @@ import (
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/kubernetes-incubator/cri-o/utils" "github.com/kubernetes-incubator/cri-o/utils"
"github.com/containernetworking/cni/pkg/ns"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
"k8s.io/kubernetes/pkg/fields" "k8s.io/kubernetes/pkg/fields"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
@ -344,6 +345,7 @@ type Container struct {
annotations fields.Set annotations fields.Set
image *pb.ImageSpec image *pb.ImageSpec
sandbox string sandbox string
netns ns.NetNS
terminal bool terminal bool
state *ContainerState state *ContainerState
metadata *pb.ContainerMetadata metadata *pb.ContainerMetadata
@ -360,7 +362,7 @@ type ContainerState struct {
} }
// NewContainer creates a container object. // NewContainer creates a container object.
func NewContainer(id string, name string, bundlePath string, logPath string, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) { func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) {
c := &Container{ c := &Container{
id: id, id: id,
name: name, name: name,
@ -368,6 +370,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, lab
logPath: logPath, logPath: logPath,
labels: labels, labels: labels,
sandbox: sandbox, sandbox: sandbox,
netns: netns,
terminal: terminal, terminal: terminal,
metadata: metadata, metadata: metadata,
annotations: annotations, annotations: annotations,
@ -421,7 +424,12 @@ func (c *Container) NetNsPath() (string, error) {
if c.state == nil { if c.state == nil {
return "", fmt.Errorf("container state is not populated") return "", fmt.Errorf("container state is not populated")
} }
if c.netns == nil {
return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil
}
return c.netns.Path(), nil
} }
// Metadata returns the metadata of the container. // Metadata returns the metadata of the container.

View file

@ -273,14 +273,20 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
logrus.Debugf("pod container state %+v", podInfraState) logrus.Debugf("pod container state %+v", podInfraState)
for nsType, nsFile := range map[string]string{ ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid)
"ipc": "ipc", if err := specgen.AddOrReplaceLinuxNamespace("ipc", ipcNsPath); err != nil {
"network": "net",
} {
nsPath := fmt.Sprintf("/proc/%d/ns/%s", podInfraState.Pid, nsFile)
if err := specgen.AddOrReplaceLinuxNamespace(nsType, nsPath); err != nil {
return nil, err return nil, err
} }
netNsPath := sb.netNsPath()
if netNsPath == "" {
// The sandbox does not have a permanent namespace,
// it's on the host one.
netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid)
}
if err := specgen.AddOrReplaceLinuxNamespace("network", netNsPath); err != nil {
return nil, err
} }
imageSpec := containerConfig.GetImage() imageSpec := containerConfig.GetImage()
@ -336,7 +342,7 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
return nil, err return nil, err
} }
container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty()) container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty())
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -3,13 +3,46 @@ package server
import ( import (
"errors" "errors"
"fmt" "fmt"
"sync"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/stringid" "github.com/docker/docker/pkg/stringid"
"github.com/kubernetes-incubator/cri-o/oci" "github.com/kubernetes-incubator/cri-o/oci"
"github.com/containernetworking/cni/pkg/ns"
"k8s.io/kubernetes/pkg/fields" "k8s.io/kubernetes/pkg/fields"
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
) )
type sandboxNetNs struct {
sync.Mutex
ns ns.NetNS
closed bool
}
func netNsGet(nspath string) (*sandboxNetNs, error) {
if err := ns.IsNSorErr(nspath); err != nil {
return nil, errSandboxClosedNetNS
}
netNS, err := ns.GetNS(nspath)
if err != nil {
return nil, err
}
return &sandboxNetNs{ns: netNS, closed: false,}, nil
}
func hostNetNsPath() (string, error) {
netNS, err := ns.GetCurrentNS()
if err != nil {
return "", err
}
defer netNS.Close()
return netNS.Path(), nil
}
type sandbox struct { type sandbox struct {
id string id string
name string name string
@ -20,6 +53,7 @@ type sandbox struct {
containers oci.Store containers oci.Store
processLabel string processLabel string
mountLabel string mountLabel string
netns *sandboxNetNs
metadata *pb.PodSandboxMetadata metadata *pb.PodSandboxMetadata
shmPath string shmPath string
} }
@ -31,6 +65,7 @@ const (
var ( var (
errSandboxIDEmpty = errors.New("PodSandboxId should not be empty") errSandboxIDEmpty = errors.New("PodSandboxId should not be empty")
errSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed")
) )
func (s *sandbox) addContainer(c *oci.Container) { func (s *sandbox) addContainer(c *oci.Container) {
@ -45,6 +80,63 @@ func (s *sandbox) removeContainer(c *oci.Container) {
s.containers.Delete(c.Name()) s.containers.Delete(c.Name())
} }
func (s *sandbox) netNs() ns.NetNS {
if s.netns == nil {
return nil
}
return s.netns.ns
}
func (s *sandbox) netNsPath() string {
if s.netns == nil {
return ""
}
return s.netns.ns.Path()
}
func (s *sandbox) netNsCreate() error {
if s.netns != nil {
return fmt.Errorf("net NS already created")
}
netNS, err := ns.NewNS()
if err != nil {
return err
}
s.netns = &sandboxNetNs{
ns: netNS,
closed: false,
}
return nil
}
func (s *sandbox) netNsRemove() error {
if s.netns == nil {
logrus.Warn("no networking namespace")
return nil
}
s.netns.Lock()
defer s.netns.Unlock()
if s.netns.closed {
// netNsRemove() can be called multiple
// times without returning an error.
return nil
}
if err := s.netns.ns.Close(); err != nil {
return err
}
s.netns.closed = true
return nil
}
func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) { func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) {
var ( var (
err error err error

View file

@ -78,6 +78,9 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
if err := os.RemoveAll(podSandboxDir); err != nil { if err := os.RemoveAll(podSandboxDir); err != nil {
return nil, fmt.Errorf("failed to remove sandbox %s directory: %v", sb.id, err) return nil, fmt.Errorf("failed to remove sandbox %s directory: %v", sb.id, err)
} }
if err := sb.netNsRemove(); err != nil {
return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err)
}
s.releaseContainerName(podInfraContainer.Name()) s.releaseContainerName(podInfraContainer.Name())
s.removeContainer(podInfraContainer) s.removeContainer(podInfraContainer)
sb.infraContainer = nil sb.infraContainer = nil

View file

@ -18,9 +18,9 @@ import (
) )
// RunPodSandbox creates and runs a pod-level sandbox. // RunPodSandbox creates and runs a pod-level sandbox.
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (*pb.RunPodSandboxResponse, error) { func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
logrus.Debugf("RunPodSandboxRequest %+v", req) logrus.Debugf("RunPodSandboxRequest %+v", req)
var processLabel, mountLabel string var processLabel, mountLabel, netNsPath string
// process req.Name // process req.Name
name := req.GetConfig().GetMetadata().GetName() name := req.GetConfig().GetMetadata().GetName()
if name == "" { if name == "" {
@ -30,7 +30,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
namespace := req.GetConfig().GetMetadata().GetNamespace() namespace := req.GetConfig().GetMetadata().GetNamespace()
attempt := req.GetConfig().GetMetadata().GetAttempt() attempt := req.GetConfig().GetMetadata().GetAttempt()
var err error
id, name, err := s.generatePodIDandName(name, namespace, attempt) id, name, err := s.generatePodIDandName(name, namespace, attempt)
if err != nil { if err != nil {
return nil, err return nil, err
@ -235,6 +234,34 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
if err != nil { if err != nil {
return nil, err return nil, err
} }
netNsPath, err = hostNetNsPath()
if err != nil {
return nil, err
}
} else {
// Create the sandbox network namespace
if err = sb.netNsCreate(); err != nil {
return nil, err
}
defer func() {
if err == nil {
return
}
if netnsErr := sb.netNsRemove(); netnsErr != nil {
logrus.Warnf("Failed to remove networking namespace: %v", netnsErr)
}
} ()
// Pass the created namespace path to the runtime
err = g.AddOrReplaceLinuxNamespace("network", sb.netNsPath())
if err != nil {
return nil, err
}
netNsPath = sb.netNsPath()
} }
if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() { if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
@ -267,7 +294,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
} }
} }
container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, labels, annotations, nil, nil, id, false) container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, sb.netNs(), labels, annotations, nil, nil, id, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -284,11 +311,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
// setup the network // setup the network
podNamespace := "" podNamespace := ""
netnsPath, err := container.NetNsPath() if err = s.netPlugin.SetUpPod(netNsPath, podNamespace, id, containerName); err != nil {
if err != nil {
return nil, err
}
if err = s.netPlugin.SetUpPod(netnsPath, podNamespace, id, containerName); err != nil {
return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err) return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err)
} }
@ -300,7 +323,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
return nil, err return nil, err
} }
resp := &pb.RunPodSandboxResponse{PodSandboxId: &id} resp = &pb.RunPodSandboxResponse{PodSandboxId: &id}
logrus.Debugf("RunPodSandboxResponse: %+v", resp) logrus.Debugf("RunPodSandboxResponse: %+v", resp)
return resp, nil return resp, nil
} }

View file

@ -35,6 +35,11 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque
podInfraContainer.Name(), sb.id, err) podInfraContainer.Name(), sb.id, err)
} }
// Close the sandbox networking namespace.
if err := sb.netNsRemove(); err != nil {
return nil, err
}
containers := sb.containers.List() containers := sb.containers.List()
containers = append(containers, podInfraContainer) containers = append(containers, podInfraContainer)

View file

@ -92,7 +92,7 @@ func (s *Server) loadContainer(id string) error {
return err return err
} }
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], labels, annotations, img, &metadata, sb.id, tty) ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty)
if err != nil { if err != nil {
return err return err
} }
@ -106,6 +106,22 @@ func (s *Server) loadContainer(id string) error {
return nil return nil
} }
func configNetNsPath(spec rspec.Spec) (string, error) {
for _, ns := range spec.Linux.Namespaces {
if ns.Type != rspec.NetworkNamespace {
continue
}
if ns.Path == "" {
return "", fmt.Errorf("empty networking namespace")
}
return ns.Path, nil
}
return "", fmt.Errorf("missing networking namespace")
}
func (s *Server) loadSandbox(id string) error { func (s *Server) loadSandbox(id string) error {
config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json")) config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json"))
if err != nil { if err != nil {
@ -151,6 +167,22 @@ func (s *Server) loadSandbox(id string) error {
metadata: &metadata, metadata: &metadata,
shmPath: m.Annotations["ocid/shm_path"], shmPath: m.Annotations["ocid/shm_path"],
} }
// We add a netNS only if we can load a permanent one.
// Otherwise, the sandbox will live in the host namespace.
netNsPath, err := configNetNsPath(m)
if err == nil {
netNS, nsErr := netNsGet(netNsPath)
// If we can't load the networking namespace
// because it's closed, we just set the sb netns
// pointer to nil. Otherwise we return an error.
if nsErr != nil && nsErr != errSandboxClosedNetNS {
return nsErr
}
sb.netns = netNS
}
s.addSandbox(sb) s.addSandbox(sb)
sandboxPath := filepath.Join(s.config.SandboxDir, id) sandboxPath := filepath.Join(s.config.SandboxDir, id)
@ -163,7 +195,7 @@ func (s *Server) loadSandbox(id string) error {
if err != nil { if err != nil {
return err return err
} }
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, labels, annotations, nil, nil, id, false) scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false)
if err != nil { if err != nil {
return err return err
} }