From 4cab8ed06ac834e1c5181fe179b66b5767075bb9 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 23 Nov 2016 18:16:21 +0100 Subject: [PATCH] sandbox: Use persistent networking namespace Because they need to prepare the hypervisor networking interfaces and have them match the ones created in the pod networking namespace (typically to bridge TAP and veth interfaces), hypervisor based container runtimes need the sandbox pod networking namespace to be set up before it's created. They can then prepare and start the hypervisor interfaces when creating the pod virtual machine. In order to do so, we need to create per pod persitent networking namespaces that we pass to the CNI plugin. This patch leverages the CNI ns package to create such namespaces under /var/run/netns, and assign them to all pod containers. The persitent namespace is removed when either the pod is stopped or removed. Since the StopPodSandbox() API can be called multiple times from kubelet, we track the pod networking namespace state (closed or not) so that we don't get a containernetworking/ns package error when calling its Close() routine multiple times as well. Signed-off-by: Samuel Ortiz --- oci/oci.go | 12 ++++- server/container_create.go | 24 ++++++---- server/sandbox.go | 94 +++++++++++++++++++++++++++++++++++++- server/sandbox_remove.go | 3 ++ server/sandbox_run.go | 43 +++++++++++++---- server/sandbox_stop.go | 5 ++ server/server.go | 36 ++++++++++++++- 7 files changed, 193 insertions(+), 24 deletions(-) diff --git a/oci/oci.go b/oci/oci.go index 287bc73e..033c1fe3 100644 --- a/oci/oci.go +++ b/oci/oci.go @@ -18,6 +18,7 @@ import ( "github.com/Sirupsen/logrus" "github.com/kubernetes-incubator/cri-o/utils" + "github.com/containernetworking/cni/pkg/ns" "golang.org/x/sys/unix" "k8s.io/kubernetes/pkg/fields" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" @@ -344,6 +345,7 @@ type Container struct { annotations fields.Set image *pb.ImageSpec sandbox string + netns ns.NetNS terminal bool state *ContainerState metadata *pb.ContainerMetadata @@ -360,7 +362,7 @@ type ContainerState struct { } // NewContainer creates a container object. -func NewContainer(id string, name string, bundlePath string, logPath string, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) { +func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) { c := &Container{ id: id, name: name, @@ -368,6 +370,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, lab logPath: logPath, labels: labels, sandbox: sandbox, + netns: netns, terminal: terminal, metadata: metadata, annotations: annotations, @@ -421,7 +424,12 @@ func (c *Container) NetNsPath() (string, error) { if c.state == nil { return "", fmt.Errorf("container state is not populated") } - return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil + + if c.netns == nil { + return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil + } + + return c.netns.Path(), nil } // Metadata returns the metadata of the container. diff --git a/server/container_create.go b/server/container_create.go index 5445a3ab..20674e56 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -273,14 +273,20 @@ func (s *Server) createSandboxContainer(containerID string, containerName string logrus.Debugf("pod container state %+v", podInfraState) - for nsType, nsFile := range map[string]string{ - "ipc": "ipc", - "network": "net", - } { - nsPath := fmt.Sprintf("/proc/%d/ns/%s", podInfraState.Pid, nsFile) - if err := specgen.AddOrReplaceLinuxNamespace(nsType, nsPath); err != nil { - return nil, err - } + ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid) + if err := specgen.AddOrReplaceLinuxNamespace("ipc", ipcNsPath); err != nil { + return nil, err + } + + netNsPath := sb.netNsPath() + if netNsPath == "" { + // The sandbox does not have a permanent namespace, + // it's on the host one. + netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid) + } + + if err := specgen.AddOrReplaceLinuxNamespace("network", netNsPath); err != nil { + return nil, err } imageSpec := containerConfig.GetImage() @@ -336,7 +342,7 @@ func (s *Server) createSandboxContainer(containerID string, containerName string return nil, err } - container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty()) + container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty()) if err != nil { return nil, err } diff --git a/server/sandbox.go b/server/sandbox.go index 34174575..6d44bb7e 100644 --- a/server/sandbox.go +++ b/server/sandbox.go @@ -3,13 +3,46 @@ package server import ( "errors" "fmt" + "sync" + "github.com/Sirupsen/logrus" "github.com/docker/docker/pkg/stringid" "github.com/kubernetes-incubator/cri-o/oci" + "github.com/containernetworking/cni/pkg/ns" "k8s.io/kubernetes/pkg/fields" pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" ) +type sandboxNetNs struct { + sync.Mutex + ns ns.NetNS + closed bool +} + +func netNsGet(nspath string) (*sandboxNetNs, error) { + if err := ns.IsNSorErr(nspath); err != nil { + return nil, errSandboxClosedNetNS + } + + netNS, err := ns.GetNS(nspath) + if err != nil { + return nil, err + } + + return &sandboxNetNs{ns: netNS, closed: false,}, nil +} + +func hostNetNsPath() (string, error) { + netNS, err := ns.GetCurrentNS() + if err != nil { + return "", err + } + + defer netNS.Close() + + return netNS.Path(), nil +} + type sandbox struct { id string name string @@ -20,6 +53,7 @@ type sandbox struct { containers oci.Store processLabel string mountLabel string + netns *sandboxNetNs metadata *pb.PodSandboxMetadata shmPath string } @@ -30,7 +64,8 @@ const ( ) var ( - errSandboxIDEmpty = errors.New("PodSandboxId should not be empty") + errSandboxIDEmpty = errors.New("PodSandboxId should not be empty") + errSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed") ) func (s *sandbox) addContainer(c *oci.Container) { @@ -45,6 +80,63 @@ func (s *sandbox) removeContainer(c *oci.Container) { s.containers.Delete(c.Name()) } +func (s *sandbox) netNs() ns.NetNS { + if s.netns == nil { + return nil + } + + return s.netns.ns +} + +func (s *sandbox) netNsPath() string { + if s.netns == nil { + return "" + } + + return s.netns.ns.Path() +} + +func (s *sandbox) netNsCreate() error { + if s.netns != nil { + return fmt.Errorf("net NS already created") + } + + netNS, err := ns.NewNS() + if err != nil { + return err + } + + s.netns = &sandboxNetNs{ + ns: netNS, + closed: false, + } + + return nil +} + +func (s *sandbox) netNsRemove() error { + if s.netns == nil { + logrus.Warn("no networking namespace") + return nil + } + + s.netns.Lock() + defer s.netns.Unlock() + + if s.netns.closed { + // netNsRemove() can be called multiple + // times without returning an error. + return nil + } + + if err := s.netns.ns.Close(); err != nil { + return err + } + + s.netns.closed = true + return nil +} + func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) { var ( err error diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go index 00c1d74f..d4bda1d4 100644 --- a/server/sandbox_remove.go +++ b/server/sandbox_remove.go @@ -78,6 +78,9 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR if err := os.RemoveAll(podSandboxDir); err != nil { return nil, fmt.Errorf("failed to remove sandbox %s directory: %v", sb.id, err) } + if err := sb.netNsRemove(); err != nil { + return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err) + } s.releaseContainerName(podInfraContainer.Name()) s.removeContainer(podInfraContainer) sb.infraContainer = nil diff --git a/server/sandbox_run.go b/server/sandbox_run.go index 5b494a74..b4850df2 100644 --- a/server/sandbox_run.go +++ b/server/sandbox_run.go @@ -18,9 +18,9 @@ import ( ) // RunPodSandbox creates and runs a pod-level sandbox. -func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (*pb.RunPodSandboxResponse, error) { +func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { logrus.Debugf("RunPodSandboxRequest %+v", req) - var processLabel, mountLabel string + var processLabel, mountLabel, netNsPath string // process req.Name name := req.GetConfig().GetMetadata().GetName() if name == "" { @@ -30,7 +30,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest namespace := req.GetConfig().GetMetadata().GetNamespace() attempt := req.GetConfig().GetMetadata().GetAttempt() - var err error id, name, err := s.generatePodIDandName(name, namespace, attempt) if err != nil { return nil, err @@ -235,6 +234,34 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest if err != nil { return nil, err } + + netNsPath, err = hostNetNsPath() + if err != nil { + return nil, err + } + } else { + // Create the sandbox network namespace + if err = sb.netNsCreate(); err != nil { + return nil, err + } + + defer func() { + if err == nil { + return + } + + if netnsErr := sb.netNsRemove(); netnsErr != nil { + logrus.Warnf("Failed to remove networking namespace: %v", netnsErr) + } + } () + + // Pass the created namespace path to the runtime + err = g.AddOrReplaceLinuxNamespace("network", sb.netNsPath()) + if err != nil { + return nil, err + } + + netNsPath = sb.netNsPath() } if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() { @@ -267,7 +294,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } } - container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, labels, annotations, nil, nil, id, false) + container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, sb.netNs(), labels, annotations, nil, nil, id, false) if err != nil { return nil, err } @@ -284,11 +311,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest // setup the network podNamespace := "" - netnsPath, err := container.NetNsPath() - if err != nil { - return nil, err - } - if err = s.netPlugin.SetUpPod(netnsPath, podNamespace, id, containerName); err != nil { + if err = s.netPlugin.SetUpPod(netNsPath, podNamespace, id, containerName); err != nil { return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err) } @@ -300,7 +323,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest return nil, err } - resp := &pb.RunPodSandboxResponse{PodSandboxId: &id} + resp = &pb.RunPodSandboxResponse{PodSandboxId: &id} logrus.Debugf("RunPodSandboxResponse: %+v", resp) return resp, nil } diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go index 2506ad4e..47f570c2 100644 --- a/server/sandbox_stop.go +++ b/server/sandbox_stop.go @@ -35,6 +35,11 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque podInfraContainer.Name(), sb.id, err) } + // Close the sandbox networking namespace. + if err := sb.netNsRemove(); err != nil { + return nil, err + } + containers := sb.containers.List() containers = append(containers, podInfraContainer) diff --git a/server/server.go b/server/server.go index e5620c87..60f959ad 100644 --- a/server/server.go +++ b/server/server.go @@ -92,7 +92,7 @@ func (s *Server) loadContainer(id string) error { return err } - ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], labels, annotations, img, &metadata, sb.id, tty) + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty) if err != nil { return err } @@ -106,6 +106,22 @@ func (s *Server) loadContainer(id string) error { return nil } +func configNetNsPath(spec rspec.Spec) (string, error) { + for _, ns := range spec.Linux.Namespaces { + if ns.Type != rspec.NetworkNamespace { + continue + } + + if ns.Path == "" { + return "", fmt.Errorf("empty networking namespace") + } + + return ns.Path, nil + } + + return "", fmt.Errorf("missing networking namespace") +} + func (s *Server) loadSandbox(id string) error { config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json")) if err != nil { @@ -151,6 +167,22 @@ func (s *Server) loadSandbox(id string) error { metadata: &metadata, shmPath: m.Annotations["ocid/shm_path"], } + + // We add a netNS only if we can load a permanent one. + // Otherwise, the sandbox will live in the host namespace. + netNsPath, err := configNetNsPath(m) + if err == nil { + netNS, nsErr := netNsGet(netNsPath) + // If we can't load the networking namespace + // because it's closed, we just set the sb netns + // pointer to nil. Otherwise we return an error. + if nsErr != nil && nsErr != errSandboxClosedNetNS { + return nsErr + } + + sb.netns = netNS + } + s.addSandbox(sb) sandboxPath := filepath.Join(s.config.SandboxDir, id) @@ -163,7 +195,7 @@ func (s *Server) loadSandbox(id string) error { if err != nil { return err } - scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, labels, annotations, nil, nil, id, false) + scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false) if err != nil { return err }