sandbox: Use persistent networking namespace
Because they need to prepare the hypervisor networking interfaces and have them match the ones created in the pod networking namespace (typically to bridge TAP and veth interfaces), hypervisor based container runtimes need the sandbox pod networking namespace to be set up before it's created. They can then prepare and start the hypervisor interfaces when creating the pod virtual machine. In order to do so, we need to create per pod persitent networking namespaces that we pass to the CNI plugin. This patch leverages the CNI ns package to create such namespaces under /var/run/netns, and assign them to all pod containers. The persitent namespace is removed when either the pod is stopped or removed. Since the StopPodSandbox() API can be called multiple times from kubelet, we track the pod networking namespace state (closed or not) so that we don't get a containernetworking/ns package error when calling its Close() routine multiple times as well. Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
This commit is contained in:
parent
be3ed3bcbc
commit
4cab8ed06a
7 changed files with 193 additions and 24 deletions
10
oci/oci.go
10
oci/oci.go
|
@ -18,6 +18,7 @@ import (
|
||||||
|
|
||||||
"github.com/Sirupsen/logrus"
|
"github.com/Sirupsen/logrus"
|
||||||
"github.com/kubernetes-incubator/cri-o/utils"
|
"github.com/kubernetes-incubator/cri-o/utils"
|
||||||
|
"github.com/containernetworking/cni/pkg/ns"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
"k8s.io/kubernetes/pkg/fields"
|
"k8s.io/kubernetes/pkg/fields"
|
||||||
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||||
|
@ -344,6 +345,7 @@ type Container struct {
|
||||||
annotations fields.Set
|
annotations fields.Set
|
||||||
image *pb.ImageSpec
|
image *pb.ImageSpec
|
||||||
sandbox string
|
sandbox string
|
||||||
|
netns ns.NetNS
|
||||||
terminal bool
|
terminal bool
|
||||||
state *ContainerState
|
state *ContainerState
|
||||||
metadata *pb.ContainerMetadata
|
metadata *pb.ContainerMetadata
|
||||||
|
@ -360,7 +362,7 @@ type ContainerState struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewContainer creates a container object.
|
// NewContainer creates a container object.
|
||||||
func NewContainer(id string, name string, bundlePath string, logPath string, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) {
|
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) {
|
||||||
c := &Container{
|
c := &Container{
|
||||||
id: id,
|
id: id,
|
||||||
name: name,
|
name: name,
|
||||||
|
@ -368,6 +370,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, lab
|
||||||
logPath: logPath,
|
logPath: logPath,
|
||||||
labels: labels,
|
labels: labels,
|
||||||
sandbox: sandbox,
|
sandbox: sandbox,
|
||||||
|
netns: netns,
|
||||||
terminal: terminal,
|
terminal: terminal,
|
||||||
metadata: metadata,
|
metadata: metadata,
|
||||||
annotations: annotations,
|
annotations: annotations,
|
||||||
|
@ -421,9 +424,14 @@ func (c *Container) NetNsPath() (string, error) {
|
||||||
if c.state == nil {
|
if c.state == nil {
|
||||||
return "", fmt.Errorf("container state is not populated")
|
return "", fmt.Errorf("container state is not populated")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.netns == nil {
|
||||||
return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil
|
return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return c.netns.Path(), nil
|
||||||
|
}
|
||||||
|
|
||||||
// Metadata returns the metadata of the container.
|
// Metadata returns the metadata of the container.
|
||||||
func (c *Container) Metadata() *pb.ContainerMetadata {
|
func (c *Container) Metadata() *pb.ContainerMetadata {
|
||||||
return c.metadata
|
return c.metadata
|
||||||
|
|
|
@ -273,14 +273,20 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
|
||||||
|
|
||||||
logrus.Debugf("pod container state %+v", podInfraState)
|
logrus.Debugf("pod container state %+v", podInfraState)
|
||||||
|
|
||||||
for nsType, nsFile := range map[string]string{
|
ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid)
|
||||||
"ipc": "ipc",
|
if err := specgen.AddOrReplaceLinuxNamespace("ipc", ipcNsPath); err != nil {
|
||||||
"network": "net",
|
|
||||||
} {
|
|
||||||
nsPath := fmt.Sprintf("/proc/%d/ns/%s", podInfraState.Pid, nsFile)
|
|
||||||
if err := specgen.AddOrReplaceLinuxNamespace(nsType, nsPath); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
netNsPath := sb.netNsPath()
|
||||||
|
if netNsPath == "" {
|
||||||
|
// The sandbox does not have a permanent namespace,
|
||||||
|
// it's on the host one.
|
||||||
|
netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := specgen.AddOrReplaceLinuxNamespace("network", netNsPath); err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
imageSpec := containerConfig.GetImage()
|
imageSpec := containerConfig.GetImage()
|
||||||
|
@ -336,7 +342,7 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty())
|
container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,13 +3,46 @@ package server
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/Sirupsen/logrus"
|
||||||
"github.com/docker/docker/pkg/stringid"
|
"github.com/docker/docker/pkg/stringid"
|
||||||
"github.com/kubernetes-incubator/cri-o/oci"
|
"github.com/kubernetes-incubator/cri-o/oci"
|
||||||
|
"github.com/containernetworking/cni/pkg/ns"
|
||||||
"k8s.io/kubernetes/pkg/fields"
|
"k8s.io/kubernetes/pkg/fields"
|
||||||
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type sandboxNetNs struct {
|
||||||
|
sync.Mutex
|
||||||
|
ns ns.NetNS
|
||||||
|
closed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func netNsGet(nspath string) (*sandboxNetNs, error) {
|
||||||
|
if err := ns.IsNSorErr(nspath); err != nil {
|
||||||
|
return nil, errSandboxClosedNetNS
|
||||||
|
}
|
||||||
|
|
||||||
|
netNS, err := ns.GetNS(nspath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &sandboxNetNs{ns: netNS, closed: false,}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hostNetNsPath() (string, error) {
|
||||||
|
netNS, err := ns.GetCurrentNS()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer netNS.Close()
|
||||||
|
|
||||||
|
return netNS.Path(), nil
|
||||||
|
}
|
||||||
|
|
||||||
type sandbox struct {
|
type sandbox struct {
|
||||||
id string
|
id string
|
||||||
name string
|
name string
|
||||||
|
@ -20,6 +53,7 @@ type sandbox struct {
|
||||||
containers oci.Store
|
containers oci.Store
|
||||||
processLabel string
|
processLabel string
|
||||||
mountLabel string
|
mountLabel string
|
||||||
|
netns *sandboxNetNs
|
||||||
metadata *pb.PodSandboxMetadata
|
metadata *pb.PodSandboxMetadata
|
||||||
shmPath string
|
shmPath string
|
||||||
}
|
}
|
||||||
|
@ -31,6 +65,7 @@ const (
|
||||||
|
|
||||||
var (
|
var (
|
||||||
errSandboxIDEmpty = errors.New("PodSandboxId should not be empty")
|
errSandboxIDEmpty = errors.New("PodSandboxId should not be empty")
|
||||||
|
errSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed")
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *sandbox) addContainer(c *oci.Container) {
|
func (s *sandbox) addContainer(c *oci.Container) {
|
||||||
|
@ -45,6 +80,63 @@ func (s *sandbox) removeContainer(c *oci.Container) {
|
||||||
s.containers.Delete(c.Name())
|
s.containers.Delete(c.Name())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNs() ns.NetNS {
|
||||||
|
if s.netns == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.netns.ns
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNsPath() string {
|
||||||
|
if s.netns == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.netns.ns.Path()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNsCreate() error {
|
||||||
|
if s.netns != nil {
|
||||||
|
return fmt.Errorf("net NS already created")
|
||||||
|
}
|
||||||
|
|
||||||
|
netNS, err := ns.NewNS()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.netns = &sandboxNetNs{
|
||||||
|
ns: netNS,
|
||||||
|
closed: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNsRemove() error {
|
||||||
|
if s.netns == nil {
|
||||||
|
logrus.Warn("no networking namespace")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
s.netns.Lock()
|
||||||
|
defer s.netns.Unlock()
|
||||||
|
|
||||||
|
if s.netns.closed {
|
||||||
|
// netNsRemove() can be called multiple
|
||||||
|
// times without returning an error.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.netns.ns.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.netns.closed = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) {
|
func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) {
|
||||||
var (
|
var (
|
||||||
err error
|
err error
|
||||||
|
|
|
@ -78,6 +78,9 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
|
||||||
if err := os.RemoveAll(podSandboxDir); err != nil {
|
if err := os.RemoveAll(podSandboxDir); err != nil {
|
||||||
return nil, fmt.Errorf("failed to remove sandbox %s directory: %v", sb.id, err)
|
return nil, fmt.Errorf("failed to remove sandbox %s directory: %v", sb.id, err)
|
||||||
}
|
}
|
||||||
|
if err := sb.netNsRemove(); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err)
|
||||||
|
}
|
||||||
s.releaseContainerName(podInfraContainer.Name())
|
s.releaseContainerName(podInfraContainer.Name())
|
||||||
s.removeContainer(podInfraContainer)
|
s.removeContainer(podInfraContainer)
|
||||||
sb.infraContainer = nil
|
sb.infraContainer = nil
|
||||||
|
|
|
@ -18,9 +18,9 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// RunPodSandbox creates and runs a pod-level sandbox.
|
// RunPodSandbox creates and runs a pod-level sandbox.
|
||||||
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (*pb.RunPodSandboxResponse, error) {
|
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
|
||||||
logrus.Debugf("RunPodSandboxRequest %+v", req)
|
logrus.Debugf("RunPodSandboxRequest %+v", req)
|
||||||
var processLabel, mountLabel string
|
var processLabel, mountLabel, netNsPath string
|
||||||
// process req.Name
|
// process req.Name
|
||||||
name := req.GetConfig().GetMetadata().GetName()
|
name := req.GetConfig().GetMetadata().GetName()
|
||||||
if name == "" {
|
if name == "" {
|
||||||
|
@ -30,7 +30,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
namespace := req.GetConfig().GetMetadata().GetNamespace()
|
namespace := req.GetConfig().GetMetadata().GetNamespace()
|
||||||
attempt := req.GetConfig().GetMetadata().GetAttempt()
|
attempt := req.GetConfig().GetMetadata().GetAttempt()
|
||||||
|
|
||||||
var err error
|
|
||||||
id, name, err := s.generatePodIDandName(name, namespace, attempt)
|
id, name, err := s.generatePodIDandName(name, namespace, attempt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -235,6 +234,34 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
netNsPath, err = hostNetNsPath()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Create the sandbox network namespace
|
||||||
|
if err = sb.netNsCreate(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if netnsErr := sb.netNsRemove(); netnsErr != nil {
|
||||||
|
logrus.Warnf("Failed to remove networking namespace: %v", netnsErr)
|
||||||
|
}
|
||||||
|
} ()
|
||||||
|
|
||||||
|
// Pass the created namespace path to the runtime
|
||||||
|
err = g.AddOrReplaceLinuxNamespace("network", sb.netNsPath())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
netNsPath = sb.netNsPath()
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
|
if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
|
||||||
|
@ -267,7 +294,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, labels, annotations, nil, nil, id, false)
|
container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, sb.netNs(), labels, annotations, nil, nil, id, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -284,11 +311,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
|
|
||||||
// setup the network
|
// setup the network
|
||||||
podNamespace := ""
|
podNamespace := ""
|
||||||
netnsPath, err := container.NetNsPath()
|
if err = s.netPlugin.SetUpPod(netNsPath, podNamespace, id, containerName); err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if err = s.netPlugin.SetUpPod(netnsPath, podNamespace, id, containerName); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err)
|
return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -300,7 +323,7 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &pb.RunPodSandboxResponse{PodSandboxId: &id}
|
resp = &pb.RunPodSandboxResponse{PodSandboxId: &id}
|
||||||
logrus.Debugf("RunPodSandboxResponse: %+v", resp)
|
logrus.Debugf("RunPodSandboxResponse: %+v", resp)
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,11 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque
|
||||||
podInfraContainer.Name(), sb.id, err)
|
podInfraContainer.Name(), sb.id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Close the sandbox networking namespace.
|
||||||
|
if err := sb.netNsRemove(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
containers := sb.containers.List()
|
containers := sb.containers.List()
|
||||||
containers = append(containers, podInfraContainer)
|
containers = append(containers, podInfraContainer)
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ func (s *Server) loadContainer(id string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], labels, annotations, img, &metadata, sb.id, tty)
|
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -106,6 +106,22 @@ func (s *Server) loadContainer(id string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func configNetNsPath(spec rspec.Spec) (string, error) {
|
||||||
|
for _, ns := range spec.Linux.Namespaces {
|
||||||
|
if ns.Type != rspec.NetworkNamespace {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ns.Path == "" {
|
||||||
|
return "", fmt.Errorf("empty networking namespace")
|
||||||
|
}
|
||||||
|
|
||||||
|
return ns.Path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("missing networking namespace")
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) loadSandbox(id string) error {
|
func (s *Server) loadSandbox(id string) error {
|
||||||
config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json"))
|
config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -151,6 +167,22 @@ func (s *Server) loadSandbox(id string) error {
|
||||||
metadata: &metadata,
|
metadata: &metadata,
|
||||||
shmPath: m.Annotations["ocid/shm_path"],
|
shmPath: m.Annotations["ocid/shm_path"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We add a netNS only if we can load a permanent one.
|
||||||
|
// Otherwise, the sandbox will live in the host namespace.
|
||||||
|
netNsPath, err := configNetNsPath(m)
|
||||||
|
if err == nil {
|
||||||
|
netNS, nsErr := netNsGet(netNsPath)
|
||||||
|
// If we can't load the networking namespace
|
||||||
|
// because it's closed, we just set the sb netns
|
||||||
|
// pointer to nil. Otherwise we return an error.
|
||||||
|
if nsErr != nil && nsErr != errSandboxClosedNetNS {
|
||||||
|
return nsErr
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.netns = netNS
|
||||||
|
}
|
||||||
|
|
||||||
s.addSandbox(sb)
|
s.addSandbox(sb)
|
||||||
|
|
||||||
sandboxPath := filepath.Join(s.config.SandboxDir, id)
|
sandboxPath := filepath.Join(s.config.SandboxDir, id)
|
||||||
|
@ -163,7 +195,7 @@ func (s *Server) loadSandbox(id string) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, labels, annotations, nil, nil, id, false)
|
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue