Merge pull request #237 from sameo/topic/sandbox_netns
Enable networking for hypervisor based container runtimes
This commit is contained in:
commit
bd585c2fca
9 changed files with 646 additions and 37 deletions
12
oci/oci.go
12
oci/oci.go
|
@ -18,6 +18,7 @@ import (
|
||||||
|
|
||||||
"github.com/Sirupsen/logrus"
|
"github.com/Sirupsen/logrus"
|
||||||
"github.com/kubernetes-incubator/cri-o/utils"
|
"github.com/kubernetes-incubator/cri-o/utils"
|
||||||
|
"github.com/containernetworking/cni/pkg/ns"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
"k8s.io/kubernetes/pkg/fields"
|
"k8s.io/kubernetes/pkg/fields"
|
||||||
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||||
|
@ -344,6 +345,7 @@ type Container struct {
|
||||||
annotations fields.Set
|
annotations fields.Set
|
||||||
image *pb.ImageSpec
|
image *pb.ImageSpec
|
||||||
sandbox string
|
sandbox string
|
||||||
|
netns ns.NetNS
|
||||||
terminal bool
|
terminal bool
|
||||||
state *ContainerState
|
state *ContainerState
|
||||||
metadata *pb.ContainerMetadata
|
metadata *pb.ContainerMetadata
|
||||||
|
@ -360,7 +362,7 @@ type ContainerState struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewContainer creates a container object.
|
// NewContainer creates a container object.
|
||||||
func NewContainer(id string, name string, bundlePath string, logPath string, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) {
|
func NewContainer(id string, name string, bundlePath string, logPath string, netns ns.NetNS, labels map[string]string, annotations map[string]string, image *pb.ImageSpec, metadata *pb.ContainerMetadata, sandbox string, terminal bool) (*Container, error) {
|
||||||
c := &Container{
|
c := &Container{
|
||||||
id: id,
|
id: id,
|
||||||
name: name,
|
name: name,
|
||||||
|
@ -368,6 +370,7 @@ func NewContainer(id string, name string, bundlePath string, logPath string, lab
|
||||||
logPath: logPath,
|
logPath: logPath,
|
||||||
labels: labels,
|
labels: labels,
|
||||||
sandbox: sandbox,
|
sandbox: sandbox,
|
||||||
|
netns: netns,
|
||||||
terminal: terminal,
|
terminal: terminal,
|
||||||
metadata: metadata,
|
metadata: metadata,
|
||||||
annotations: annotations,
|
annotations: annotations,
|
||||||
|
@ -421,7 +424,12 @@ func (c *Container) NetNsPath() (string, error) {
|
||||||
if c.state == nil {
|
if c.state == nil {
|
||||||
return "", fmt.Errorf("container state is not populated")
|
return "", fmt.Errorf("container state is not populated")
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil
|
|
||||||
|
if c.netns == nil {
|
||||||
|
return fmt.Sprintf("/proc/%d/ns/net", c.state.Pid), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.netns.Path(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Metadata returns the metadata of the container.
|
// Metadata returns the metadata of the container.
|
||||||
|
|
|
@ -273,14 +273,20 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
|
||||||
|
|
||||||
logrus.Debugf("pod container state %+v", podInfraState)
|
logrus.Debugf("pod container state %+v", podInfraState)
|
||||||
|
|
||||||
for nsType, nsFile := range map[string]string{
|
ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid)
|
||||||
"ipc": "ipc",
|
if err := specgen.AddOrReplaceLinuxNamespace("ipc", ipcNsPath); err != nil {
|
||||||
"network": "net",
|
return nil, err
|
||||||
} {
|
}
|
||||||
nsPath := fmt.Sprintf("/proc/%d/ns/%s", podInfraState.Pid, nsFile)
|
|
||||||
if err := specgen.AddOrReplaceLinuxNamespace(nsType, nsPath); err != nil {
|
netNsPath := sb.netNsPath()
|
||||||
return nil, err
|
if netNsPath == "" {
|
||||||
}
|
// The sandbox does not have a permanent namespace,
|
||||||
|
// it's on the host one.
|
||||||
|
netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := specgen.AddOrReplaceLinuxNamespace("network", netNsPath); err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
imageSpec := containerConfig.GetImage()
|
imageSpec := containerConfig.GetImage()
|
||||||
|
@ -336,7 +342,7 @@ func (s *Server) createSandboxContainer(containerID string, containerName string
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty())
|
container, err := oci.NewContainer(containerID, containerName, containerDir, logPath, sb.netNs(), labels, annotations, imageSpec, metadata, sb.id, containerConfig.GetTty())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,15 +1,128 @@
|
||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/rand"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/Sirupsen/logrus"
|
||||||
"github.com/docker/docker/pkg/stringid"
|
"github.com/docker/docker/pkg/stringid"
|
||||||
"github.com/kubernetes-incubator/cri-o/oci"
|
"github.com/kubernetes-incubator/cri-o/oci"
|
||||||
|
"github.com/containernetworking/cni/pkg/ns"
|
||||||
"k8s.io/kubernetes/pkg/fields"
|
"k8s.io/kubernetes/pkg/fields"
|
||||||
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type sandboxNetNs struct {
|
||||||
|
sync.Mutex
|
||||||
|
ns ns.NetNS
|
||||||
|
symlink *os.File
|
||||||
|
closed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *sandboxNetNs) symlinkCreate(name string) error {
|
||||||
|
b := make([]byte, 4)
|
||||||
|
_, randErr := rand.Reader.Read(b)
|
||||||
|
if randErr != nil {
|
||||||
|
return randErr
|
||||||
|
}
|
||||||
|
|
||||||
|
nsName := fmt.Sprintf("%s-%x", name, b)
|
||||||
|
symlinkPath := filepath.Join(nsRunDir, nsName)
|
||||||
|
|
||||||
|
if err := os.Symlink(ns.ns.Path(), symlinkPath); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err := os.Open(symlinkPath)
|
||||||
|
if err != nil {
|
||||||
|
if removeErr := os.RemoveAll(symlinkPath); removeErr != nil {
|
||||||
|
return removeErr
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
ns.symlink = fd
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *sandboxNetNs) symlinkRemove() error {
|
||||||
|
if err := ns.symlink.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.RemoveAll(ns.symlink.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSymbolicLink(path string) (bool, error) {
|
||||||
|
fi, err := os.Lstat(path)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func netNsGet(nspath, name string) (*sandboxNetNs, error) {
|
||||||
|
if err := ns.IsNSorErr(nspath); err != nil {
|
||||||
|
return nil, errSandboxClosedNetNS
|
||||||
|
}
|
||||||
|
|
||||||
|
symlink, symlinkErr := isSymbolicLink(nspath)
|
||||||
|
if symlinkErr != nil {
|
||||||
|
return nil, symlinkErr
|
||||||
|
}
|
||||||
|
|
||||||
|
var resolvedNsPath string
|
||||||
|
if symlink {
|
||||||
|
path, err := os.Readlink(nspath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resolvedNsPath = path
|
||||||
|
} else {
|
||||||
|
resolvedNsPath = nspath
|
||||||
|
}
|
||||||
|
|
||||||
|
netNS, err := ns.GetNS(resolvedNsPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
netNs := &sandboxNetNs{ns: netNS, closed: false,}
|
||||||
|
|
||||||
|
if symlink {
|
||||||
|
fd, err := os.Open(nspath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
netNs.symlink = fd
|
||||||
|
} else {
|
||||||
|
if err := netNs.symlinkCreate(name); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return netNs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hostNetNsPath() (string, error) {
|
||||||
|
netNS, err := ns.GetCurrentNS()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer netNS.Close()
|
||||||
|
|
||||||
|
return netNS.Path(), nil
|
||||||
|
}
|
||||||
|
|
||||||
type sandbox struct {
|
type sandbox struct {
|
||||||
id string
|
id string
|
||||||
name string
|
name string
|
||||||
|
@ -20,6 +133,7 @@ type sandbox struct {
|
||||||
containers oci.Store
|
containers oci.Store
|
||||||
processLabel string
|
processLabel string
|
||||||
mountLabel string
|
mountLabel string
|
||||||
|
netns *sandboxNetNs
|
||||||
metadata *pb.PodSandboxMetadata
|
metadata *pb.PodSandboxMetadata
|
||||||
shmPath string
|
shmPath string
|
||||||
}
|
}
|
||||||
|
@ -27,10 +141,12 @@ type sandbox struct {
|
||||||
const (
|
const (
|
||||||
podDefaultNamespace = "default"
|
podDefaultNamespace = "default"
|
||||||
defaultShmSize = 64 * 1024 * 1024
|
defaultShmSize = 64 * 1024 * 1024
|
||||||
|
nsRunDir = "/var/run/netns"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
errSandboxIDEmpty = errors.New("PodSandboxId should not be empty")
|
errSandboxIDEmpty = errors.New("PodSandboxId should not be empty")
|
||||||
|
errSandboxClosedNetNS = errors.New("PodSandbox networking namespace is closed")
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *sandbox) addContainer(c *oci.Container) {
|
func (s *sandbox) addContainer(c *oci.Container) {
|
||||||
|
@ -45,6 +161,77 @@ func (s *sandbox) removeContainer(c *oci.Container) {
|
||||||
s.containers.Delete(c.Name())
|
s.containers.Delete(c.Name())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNs() ns.NetNS {
|
||||||
|
if s.netns == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.netns.ns
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNsPath() string {
|
||||||
|
if s.netns == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.netns.symlink.Name()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNsCreate() error {
|
||||||
|
if s.netns != nil {
|
||||||
|
return fmt.Errorf("net NS already created")
|
||||||
|
}
|
||||||
|
|
||||||
|
netNS, err := ns.NewNS()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.netns = &sandboxNetNs{
|
||||||
|
ns: netNS,
|
||||||
|
closed: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.netns.symlinkCreate(s.name); err != nil {
|
||||||
|
logrus.Warnf("Could not create nentns symlink %v", err)
|
||||||
|
|
||||||
|
if err := s.netns.ns.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sandbox) netNsRemove() error {
|
||||||
|
if s.netns == nil {
|
||||||
|
logrus.Warn("no networking namespace")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
s.netns.Lock()
|
||||||
|
defer s.netns.Unlock()
|
||||||
|
|
||||||
|
if s.netns.closed {
|
||||||
|
// netNsRemove() can be called multiple
|
||||||
|
// times without returning an error.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.netns.symlinkRemove(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.netns.ns.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.netns.closed = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) {
|
func (s *Server) generatePodIDandName(name string, namespace string, attempt uint32) (string, string, error) {
|
||||||
var (
|
var (
|
||||||
err error
|
err error
|
||||||
|
|
|
@ -73,6 +73,10 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := sb.netNsRemove(); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to remove networking namespace for sandbox %s: %v", sb.id, err)
|
||||||
|
}
|
||||||
|
|
||||||
// Remove the files related to the sandbox
|
// Remove the files related to the sandbox
|
||||||
podSandboxDir := filepath.Join(s.config.SandboxDir, sb.id)
|
podSandboxDir := filepath.Join(s.config.SandboxDir, sb.id)
|
||||||
if err := os.RemoveAll(podSandboxDir); err != nil {
|
if err := os.RemoveAll(podSandboxDir); err != nil {
|
||||||
|
|
|
@ -17,10 +17,30 @@ import (
|
||||||
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
pb "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func (s *Server) runContainer(container *oci.Container) error {
|
||||||
|
if err := s.runtime.CreateContainer(container); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.runtime.UpdateStatus(container); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.runtime.StartContainer(container); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.runtime.UpdateStatus(container); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// RunPodSandbox creates and runs a pod-level sandbox.
|
// RunPodSandbox creates and runs a pod-level sandbox.
|
||||||
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (*pb.RunPodSandboxResponse, error) {
|
func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
|
||||||
logrus.Debugf("RunPodSandboxRequest %+v", req)
|
logrus.Debugf("RunPodSandboxRequest %+v", req)
|
||||||
var processLabel, mountLabel string
|
var processLabel, mountLabel, netNsPath string
|
||||||
// process req.Name
|
// process req.Name
|
||||||
name := req.GetConfig().GetMetadata().GetName()
|
name := req.GetConfig().GetMetadata().GetName()
|
||||||
if name == "" {
|
if name == "" {
|
||||||
|
@ -30,7 +50,6 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
namespace := req.GetConfig().GetMetadata().GetNamespace()
|
namespace := req.GetConfig().GetMetadata().GetNamespace()
|
||||||
attempt := req.GetConfig().GetMetadata().GetAttempt()
|
attempt := req.GetConfig().GetMetadata().GetAttempt()
|
||||||
|
|
||||||
var err error
|
|
||||||
id, name, err := s.generatePodIDandName(name, namespace, attempt)
|
id, name, err := s.generatePodIDandName(name, namespace, attempt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -235,6 +254,34 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
netNsPath, err = hostNetNsPath()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Create the sandbox network namespace
|
||||||
|
if err = sb.netNsCreate(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if netnsErr := sb.netNsRemove(); netnsErr != nil {
|
||||||
|
logrus.Warnf("Failed to remove networking namespace: %v", netnsErr)
|
||||||
|
}
|
||||||
|
} ()
|
||||||
|
|
||||||
|
// Pass the created namespace path to the runtime
|
||||||
|
err = g.AddOrReplaceLinuxNamespace("network", sb.netNsPath())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
netNsPath = sb.netNsPath()
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
|
if req.GetConfig().GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
|
||||||
|
@ -267,40 +314,24 @@ func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, labels, annotations, nil, nil, id, false)
|
container, err := oci.NewContainer(containerID, containerName, podSandboxDir, podSandboxDir, sb.netNs(), labels, annotations, nil, nil, id, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
sb.infraContainer = container
|
sb.infraContainer = container
|
||||||
|
|
||||||
if err = s.runtime.CreateContainer(container); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = s.runtime.UpdateStatus(container); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// setup the network
|
// setup the network
|
||||||
podNamespace := ""
|
podNamespace := ""
|
||||||
netnsPath, err := container.NetNsPath()
|
if err = s.netPlugin.SetUpPod(netNsPath, podNamespace, id, containerName); err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if err = s.netPlugin.SetUpPod(netnsPath, podNamespace, id, containerName); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err)
|
return nil, fmt.Errorf("failed to create network for container %s in sandbox %s: %v", containerName, id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = s.runtime.StartContainer(container); err != nil {
|
if err = s.runContainer(container); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = s.runtime.UpdateStatus(container); err != nil {
|
resp = &pb.RunPodSandboxResponse{PodSandboxId: &id}
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
resp := &pb.RunPodSandboxResponse{PodSandboxId: &id}
|
|
||||||
logrus.Debugf("RunPodSandboxResponse: %+v", resp)
|
logrus.Debugf("RunPodSandboxResponse: %+v", resp)
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,11 @@ func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque
|
||||||
podInfraContainer.Name(), sb.id, err)
|
podInfraContainer.Name(), sb.id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Close the sandbox networking namespace.
|
||||||
|
if err := sb.netNsRemove(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
containers := sb.containers.List()
|
containers := sb.containers.List()
|
||||||
containers = append(containers, podInfraContainer)
|
containers = append(containers, podInfraContainer)
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ func (s *Server) loadContainer(id string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], labels, annotations, img, &metadata, sb.id, tty)
|
ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations["ocid/log_path"], sb.netNs(), labels, annotations, img, &metadata, sb.id, tty)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -106,6 +106,22 @@ func (s *Server) loadContainer(id string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func configNetNsPath(spec rspec.Spec) (string, error) {
|
||||||
|
for _, ns := range spec.Linux.Namespaces {
|
||||||
|
if ns.Type != rspec.NetworkNamespace {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ns.Path == "" {
|
||||||
|
return "", fmt.Errorf("empty networking namespace")
|
||||||
|
}
|
||||||
|
|
||||||
|
return ns.Path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("missing networking namespace")
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) loadSandbox(id string) error {
|
func (s *Server) loadSandbox(id string) error {
|
||||||
config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json"))
|
config, err := ioutil.ReadFile(filepath.Join(s.config.SandboxDir, id, "config.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -151,6 +167,22 @@ func (s *Server) loadSandbox(id string) error {
|
||||||
metadata: &metadata,
|
metadata: &metadata,
|
||||||
shmPath: m.Annotations["ocid/shm_path"],
|
shmPath: m.Annotations["ocid/shm_path"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We add a netNS only if we can load a permanent one.
|
||||||
|
// Otherwise, the sandbox will live in the host namespace.
|
||||||
|
netNsPath, err := configNetNsPath(m)
|
||||||
|
if err == nil {
|
||||||
|
netNS, nsErr := netNsGet(netNsPath, sb.name)
|
||||||
|
// If we can't load the networking namespace
|
||||||
|
// because it's closed, we just set the sb netns
|
||||||
|
// pointer to nil. Otherwise we return an error.
|
||||||
|
if nsErr != nil && nsErr != errSandboxClosedNetNS {
|
||||||
|
return nsErr
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.netns = netNS
|
||||||
|
}
|
||||||
|
|
||||||
s.addSandbox(sb)
|
s.addSandbox(sb)
|
||||||
|
|
||||||
sandboxPath := filepath.Join(s.config.SandboxDir, id)
|
sandboxPath := filepath.Join(s.config.SandboxDir, id)
|
||||||
|
@ -163,7 +195,7 @@ func (s *Server) loadSandbox(id string) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, labels, annotations, nil, nil, id, false)
|
scontainer, err := oci.NewContainer(m.Annotations["ocid/container_id"], cname, sandboxPath, sandboxPath, sb.netNs(), labels, annotations, nil, nil, id, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
34
vendor/src/github.com/containernetworking/cni/pkg/ns/README.md
vendored
Normal file
34
vendor/src/github.com/containernetworking/cni/pkg/ns/README.md
vendored
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
### Namespaces, Threads, and Go
|
||||||
|
On Linux each OS thread can have a different network namespace. Go's thread scheduling model switches goroutines between OS threads based on OS thread load and whether the goroutine would block other goroutines. This can result in a goroutine switching network namespaces without notice and lead to errors in your code.
|
||||||
|
|
||||||
|
### Namespace Switching
|
||||||
|
Switching namespaces with the `ns.Set()` method is not recommended without additional strategies to prevent unexpected namespace changes when your goroutines switch OS threads.
|
||||||
|
|
||||||
|
Go provides the `runtime.LockOSThread()` function to ensure a specific goroutine executes on its current OS thread and prevents any other goroutine from running in that thread until the locked one exits. Careful usage of `LockOSThread()` and goroutines can provide good control over which network namespace a given goroutine executes in.
|
||||||
|
|
||||||
|
For example, you cannot rely on the `ns.Set()` namespace being the current namespace after the `Set()` call unless you do two things. First, the goroutine calling `Set()` must have previously called `LockOSThread()`. Second, you must ensure `runtime.UnlockOSThread()` is not called somewhere in-between. You also cannot rely on the initial network namespace remaining the current network namespace if any other code in your program switches namespaces, unless you have already called `LockOSThread()` in that goroutine. Note that `LockOSThread()` prevents the Go scheduler from optimally scheduling goroutines for best performance, so `LockOSThread()` should only be used in small, isolated goroutines that release the lock quickly.
|
||||||
|
|
||||||
|
### Do() The Recommended Thing
|
||||||
|
The `ns.Do()` method provides control over network namespaces for you by implementing these strategies. All code dependent on a particular network namespace (including the root namespace) should be wrapped in the `ns.Do()` method to ensure the correct namespace is selected for the duration of your code. For example:
|
||||||
|
|
||||||
|
```go
|
||||||
|
targetNs, err := ns.NewNS()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = targetNs.Do(func(hostNs ns.NetNS) error {
|
||||||
|
dummy := &netlink.Dummy{
|
||||||
|
LinkAttrs: netlink.LinkAttrs{
|
||||||
|
Name: "dummy0",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return netlink.LinkAdd(dummy)
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
Note this requirement to wrap every network call is very onerous - any libraries you call might call out to network services such as DNS, and all such calls need to be protected after you call `ns.Do()`. The CNI plugins all exit very soon after calling `ns.Do()` which helps to minimize the problem.
|
||||||
|
|
||||||
|
### Further Reading
|
||||||
|
- https://github.com/golang/go/wiki/LockOSThread
|
||||||
|
- http://morsmachine.dk/go-scheduler
|
||||||
|
- https://github.com/containernetworking/cni/issues/262
|
302
vendor/src/github.com/containernetworking/cni/pkg/ns/ns.go
vendored
Normal file
302
vendor/src/github.com/containernetworking/cni/pkg/ns/ns.go
vendored
Normal file
|
@ -0,0 +1,302 @@
|
||||||
|
// Copyright 2015 CNI authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package ns
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/rand"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
type NetNS interface {
|
||||||
|
// Executes the passed closure in this object's network namespace,
|
||||||
|
// attempting to restore the original namespace before returning.
|
||||||
|
// However, since each OS thread can have a different network namespace,
|
||||||
|
// and Go's thread scheduling is highly variable, callers cannot
|
||||||
|
// guarantee any specific namespace is set unless operations that
|
||||||
|
// require that namespace are wrapped with Do(). Also, no code called
|
||||||
|
// from Do() should call runtime.UnlockOSThread(), or the risk
|
||||||
|
// of executing code in an incorrect namespace will be greater. See
|
||||||
|
// https://github.com/golang/go/wiki/LockOSThread for further details.
|
||||||
|
Do(toRun func(NetNS) error) error
|
||||||
|
|
||||||
|
// Sets the current network namespace to this object's network namespace.
|
||||||
|
// Note that since Go's thread scheduling is highly variable, callers
|
||||||
|
// cannot guarantee the requested namespace will be the current namespace
|
||||||
|
// after this function is called; to ensure this wrap operations that
|
||||||
|
// require the namespace with Do() instead.
|
||||||
|
Set() error
|
||||||
|
|
||||||
|
// Returns the filesystem path representing this object's network namespace
|
||||||
|
Path() string
|
||||||
|
|
||||||
|
// Returns a file descriptor representing this object's network namespace
|
||||||
|
Fd() uintptr
|
||||||
|
|
||||||
|
// Cleans up this instance of the network namespace; if this instance
|
||||||
|
// is the last user the namespace will be destroyed
|
||||||
|
Close() error
|
||||||
|
}
|
||||||
|
|
||||||
|
type netNS struct {
|
||||||
|
file *os.File
|
||||||
|
mounted bool
|
||||||
|
closed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCurrentThreadNetNSPath() string {
|
||||||
|
// /proc/self/ns/net returns the namespace of the main thread, not
|
||||||
|
// of whatever thread this goroutine is running on. Make sure we
|
||||||
|
// use the thread's net namespace since the thread is switching around
|
||||||
|
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns an object representing the current OS thread's network namespace
|
||||||
|
func GetCurrentNS() (NetNS, error) {
|
||||||
|
return GetNS(getCurrentThreadNetNSPath())
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/magic.h
|
||||||
|
NSFS_MAGIC = 0x6e736673
|
||||||
|
PROCFS_MAGIC = 0x9fa0
|
||||||
|
)
|
||||||
|
|
||||||
|
type NSPathNotExistErr struct{ msg string }
|
||||||
|
|
||||||
|
func (e NSPathNotExistErr) Error() string { return e.msg }
|
||||||
|
|
||||||
|
type NSPathNotNSErr struct{ msg string }
|
||||||
|
|
||||||
|
func (e NSPathNotNSErr) Error() string { return e.msg }
|
||||||
|
|
||||||
|
func IsNSorErr(nspath string) error {
|
||||||
|
stat := syscall.Statfs_t{}
|
||||||
|
if err := syscall.Statfs(nspath, &stat); err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
err = NSPathNotExistErr{msg: fmt.Sprintf("failed to Statfs %q: %v", nspath, err)}
|
||||||
|
} else {
|
||||||
|
err = fmt.Errorf("failed to Statfs %q: %v", nspath, err)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch stat.Type {
|
||||||
|
case PROCFS_MAGIC, NSFS_MAGIC:
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return NSPathNotNSErr{msg: fmt.Sprintf("unknown FS magic on %q: %x", nspath, stat.Type)}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns an object representing the namespace referred to by @path
|
||||||
|
func GetNS(nspath string) (NetNS, error) {
|
||||||
|
err := IsNSorErr(nspath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err := os.Open(nspath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &netNS{file: fd}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates a new persistent network namespace and returns an object
|
||||||
|
// representing that namespace, without switching to it
|
||||||
|
func NewNS() (NetNS, error) {
|
||||||
|
const nsRunDir = "/var/run/netns"
|
||||||
|
|
||||||
|
b := make([]byte, 16)
|
||||||
|
_, err := rand.Reader.Read(b)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to generate random netns name: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = os.MkdirAll(nsRunDir, 0755)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// create an empty file at the mount point
|
||||||
|
nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
|
||||||
|
nsPath := path.Join(nsRunDir, nsName)
|
||||||
|
mountPointFd, err := os.Create(nsPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
mountPointFd.Close()
|
||||||
|
|
||||||
|
// Ensure the mount point is cleaned up on errors; if the namespace
|
||||||
|
// was successfully mounted this will have no effect because the file
|
||||||
|
// is in-use
|
||||||
|
defer os.RemoveAll(nsPath)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(1)
|
||||||
|
|
||||||
|
// do namespace work in a dedicated goroutine, so that we can safely
|
||||||
|
// Lock/Unlock OSThread without upsetting the lock/unlock state of
|
||||||
|
// the caller of this function
|
||||||
|
var fd *os.File
|
||||||
|
go (func() {
|
||||||
|
defer wg.Done()
|
||||||
|
runtime.LockOSThread()
|
||||||
|
|
||||||
|
var origNS NetNS
|
||||||
|
origNS, err = GetNS(getCurrentThreadNetNSPath())
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer origNS.Close()
|
||||||
|
|
||||||
|
// create a new netns on the current thread
|
||||||
|
err = unix.Unshare(unix.CLONE_NEWNET)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer origNS.Set()
|
||||||
|
|
||||||
|
// bind mount the new netns from the current thread onto the mount point
|
||||||
|
err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err = os.Open(nsPath)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
unix.Unmount(nsPath, unix.MNT_DETACH)
|
||||||
|
return nil, fmt.Errorf("failed to create namespace: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &netNS{file: fd, mounted: true}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *netNS) Path() string {
|
||||||
|
return ns.file.Name()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *netNS) Fd() uintptr {
|
||||||
|
return ns.file.Fd()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *netNS) errorIfClosed() error {
|
||||||
|
if ns.closed {
|
||||||
|
return fmt.Errorf("%q has already been closed", ns.file.Name())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *netNS) Close() error {
|
||||||
|
if err := ns.errorIfClosed(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := ns.file.Close(); err != nil {
|
||||||
|
return fmt.Errorf("Failed to close %q: %v", ns.file.Name(), err)
|
||||||
|
}
|
||||||
|
ns.closed = true
|
||||||
|
|
||||||
|
if ns.mounted {
|
||||||
|
if err := unix.Unmount(ns.file.Name(), unix.MNT_DETACH); err != nil {
|
||||||
|
return fmt.Errorf("Failed to unmount namespace %s: %v", ns.file.Name(), err)
|
||||||
|
}
|
||||||
|
if err := os.RemoveAll(ns.file.Name()); err != nil {
|
||||||
|
return fmt.Errorf("Failed to clean up namespace %s: %v", ns.file.Name(), err)
|
||||||
|
}
|
||||||
|
ns.mounted = false
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *netNS) Do(toRun func(NetNS) error) error {
|
||||||
|
if err := ns.errorIfClosed(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
containedCall := func(hostNS NetNS) error {
|
||||||
|
threadNS, err := GetNS(getCurrentThreadNetNSPath())
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open current netns: %v", err)
|
||||||
|
}
|
||||||
|
defer threadNS.Close()
|
||||||
|
|
||||||
|
// switch to target namespace
|
||||||
|
if err = ns.Set(); err != nil {
|
||||||
|
return fmt.Errorf("error switching to ns %v: %v", ns.file.Name(), err)
|
||||||
|
}
|
||||||
|
defer threadNS.Set() // switch back
|
||||||
|
|
||||||
|
return toRun(hostNS)
|
||||||
|
}
|
||||||
|
|
||||||
|
// save a handle to current network namespace
|
||||||
|
hostNS, err := GetNS(getCurrentThreadNetNSPath())
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Failed to open current namespace: %v", err)
|
||||||
|
}
|
||||||
|
defer hostNS.Close()
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(1)
|
||||||
|
|
||||||
|
var innerError error
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
runtime.LockOSThread()
|
||||||
|
innerError = containedCall(hostNS)
|
||||||
|
}()
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
return innerError
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ns *netNS) Set() error {
|
||||||
|
if err := ns.errorIfClosed(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, _, err := unix.Syscall(unix.SYS_SETNS, ns.Fd(), uintptr(unix.CLONE_NEWNET), 0); err != 0 {
|
||||||
|
return fmt.Errorf("Error switching to ns %v: %v", ns.file.Name(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithNetNSPath executes the passed closure under the given network
|
||||||
|
// namespace, restoring the original namespace afterwards.
|
||||||
|
func WithNetNSPath(nspath string, toRun func(NetNS) error) error {
|
||||||
|
ns, err := GetNS(nspath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer ns.Close()
|
||||||
|
return ns.Do(toRun)
|
||||||
|
}
|
Loading…
Reference in a new issue