Merge pull request #706 from 14rcole/kpod-stats

Kpod stats
This commit is contained in:
Mrunal Patel 2017-08-17 11:24:38 -07:00 committed by GitHub
commit f82fe5691a
134 changed files with 42027 additions and 33 deletions

View file

@ -55,6 +55,7 @@ It is currently in active development in the Kubernetes community through the [d
| [kpod-push(1)](/docs/kpod-push.1.md) | Push an image to a specified destination |
| [kpod-rmi(1)](/docs/kpod-rmi.1.md) | Removes one or more images |
| [kpod-save(1)](/docs/kpod-save.1.md) | Saves an image to an archive |
| [kpod-stats(1)](/docs/kpod-stats.1.md) | Display a live stream of one or more containers' resource usage statistics |
| [kpod-tag(1)](/docs/kpod-tag.1.md) | Add an additional name to a local image |
| [kpod-umount(1)](/docs/kpod-umount.1.md) | Unmount a working container's root filesystem |
| [kpod-version(1)](/docs/kpod-version.1.md) | Display the Kpod version information |

View file

@ -152,7 +152,7 @@ func outputImages(store storage.Store, images []storage.Image, truncate, digests
Name: name,
Digest: imageDigest,
CreatedAt: createdTime.Format("Jan 2, 2006 15:04"),
Size: libkpodimage.FormattedSize(size),
Size: libkpodimage.FormattedSize(float64(size)),
}
imageOutput = append(imageOutput, params)
}

View file

@ -40,6 +40,9 @@ func main() {
tagCommand,
umountCommand,
versionCommand,
saveCommand,
statsCommand,
loadCommand,
}
app.Flags = []cli.Flag{
cli.StringFlag{

240
cmd/kpod/stats.go Normal file
View file

@ -0,0 +1,240 @@
package main
import (
"encoding/json"
"fmt"
"os"
"strings"
"text/template"
"time"
tm "github.com/buger/goterm"
"github.com/kubernetes-incubator/cri-o/libkpod"
libkpodimage "github.com/kubernetes-incubator/cri-o/libkpod/image"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/pkg/errors"
"github.com/urfave/cli"
)
var printf func(format string, a ...interface{}) (n int, err error)
var println func(a ...interface{}) (n int, err error)
type statsOutputParams struct {
Container string
ID string
CPUPerc string
MemUsage string
MemPerc string
NetIO string
BlockIO string
PIDs uint64
}
var (
statsFlags = []cli.Flag{
cli.BoolFlag{
Name: "all, a",
Usage: "show all containers. Only running containers are shown by default. The default is false",
},
cli.BoolFlag{
Name: "no-stream",
Usage: "disable streaming stats and only pull the first result, default setting is false",
},
cli.StringFlag{
Name: "format",
Usage: "pretty-print container statistics using a Go template",
},
cli.BoolFlag{
Name: "json",
Usage: "output container statistics in json format",
},
}
statsDescription = "display a live stream of one or more containers' resource usage statistics"
statsCommand = cli.Command{
Name: "stats",
Usage: "Display percentage of CPU, memory, network I/O, block I/O and PIDs for one or more containers",
Description: statsDescription,
Flags: statsFlags,
Action: statsCmd,
ArgsUsage: "",
}
)
func statsCmd(c *cli.Context) error {
config, err := getConfig(c)
if err != nil {
return errors.Wrapf(err, "could not read config")
}
containerServer, err := libkpod.New(config)
if err != nil {
return errors.Wrapf(err, "could not create container server")
}
err = containerServer.Update()
if err != nil {
return errors.Wrapf(err, "could not update list of containers")
}
times := -1
if c.Bool("no-stream") {
times = 1
}
statsChan := make(chan []*libkpod.ContainerStats)
// iterate over the channel until it is closed
go func() {
// print using goterm
printf = tm.Printf
println = tm.Println
for stats := range statsChan {
// Continually refresh statistics
tm.Clear()
tm.MoveCursor(1, 1)
outputStats(stats, c.String("format"), c.Bool("json"))
tm.Flush()
time.Sleep(time.Second)
}
}()
return getStats(containerServer, c.Args(), c.Bool("all"), statsChan, times)
}
func getStats(server *libkpod.ContainerServer, args []string, all bool, statsChan chan []*libkpod.ContainerStats, times int) error {
ctrs, err := server.ListContainers(isRunning, ctrInList(args))
if err != nil {
return err
}
containerStats := map[string]*libkpod.ContainerStats{}
for _, ctr := range ctrs {
initialStats, err := server.GetContainerStats(ctr, &libkpod.ContainerStats{})
if err != nil {
return err
}
containerStats[ctr.ID()] = initialStats
}
step := 1
if times == -1 {
times = 1
step = 0
}
for i := 0; i < times; i += step {
reportStats := []*libkpod.ContainerStats{}
for _, ctr := range ctrs {
id := ctr.ID()
if _, ok := containerStats[ctr.ID()]; !ok {
initialStats, err := server.GetContainerStats(ctr, &libkpod.ContainerStats{})
if err != nil {
return err
}
containerStats[id] = initialStats
}
stats, err := server.GetContainerStats(ctr, containerStats[id])
if err != nil {
return err
}
// replace the previous measurement with the current one
containerStats[id] = stats
reportStats = append(reportStats, stats)
}
statsChan <- reportStats
err := server.Update()
if err != nil {
return err
}
ctrs, err = server.ListContainers(isRunning, ctrInList(args))
if err != nil {
return err
}
}
return nil
}
func outputStats(stats []*libkpod.ContainerStats, format string, json bool) error {
if format == "" {
outputStatsHeader()
}
if json {
return outputStatsAsJSON(stats)
}
var err error
for _, s := range stats {
if format == "" {
outputStatsUsingFormatString(s)
} else {
params := getStatsOutputParams(s)
err2 := outputStatsUsingTemplate(format, params)
if err2 != nil {
err = errors.Wrapf(err, err2.Error())
}
}
}
return err
}
func outputStatsHeader() {
printf("%-64s %-16s %-32s %-16s %-24s %-24s %s\n", "CONTAINER", "CPU %", "MEM USAGE / MEM LIMIT", "MEM %", "NET I/O", "BLOCK I/O", "PIDS")
}
func outputStatsUsingFormatString(stats *libkpod.ContainerStats) {
printf("%-64s %-16s %-32s %-16s %-24s %-24s %d\n", stats.Container, floatToPercentString(stats.CPU), combineHumanValues(stats.MemUsage, stats.MemLimit), floatToPercentString(stats.MemPerc), combineHumanValues(stats.NetInput, stats.NetOutput), combineHumanValues(stats.BlockInput, stats.BlockOutput), stats.PIDs)
}
func combineHumanValues(a, b uint64) string {
return fmt.Sprintf("%s / %s", libkpodimage.FormattedSize(float64(a)), libkpodimage.FormattedSize(float64(b)))
}
func floatToPercentString(f float64) string {
return fmt.Sprintf("%.2f %s", f, "%")
}
func getStatsOutputParams(stats *libkpod.ContainerStats) statsOutputParams {
return statsOutputParams{
Container: stats.Container,
ID: stats.Container,
CPUPerc: floatToPercentString(stats.CPU),
MemUsage: combineHumanValues(stats.MemUsage, stats.MemLimit),
MemPerc: floatToPercentString(stats.MemPerc),
NetIO: combineHumanValues(stats.NetInput, stats.NetOutput),
BlockIO: combineHumanValues(stats.BlockInput, stats.BlockOutput),
PIDs: stats.PIDs,
}
}
func outputStatsUsingTemplate(format string, params statsOutputParams) error {
tmpl, err := template.New("stats").Parse(format)
if err != nil {
return errors.Wrapf(err, "template parsing error")
}
err = tmpl.Execute(os.Stdout, params)
if err != nil {
return err
}
println()
return nil
}
func outputStatsAsJSON(stats []*libkpod.ContainerStats) error {
s, err := json.Marshal(stats)
if err != nil {
return err
}
println(s)
return nil
}
func isRunning(ctr *oci.Container) bool {
return ctr.State().Status == "running"
}
func ctrInList(idsOrNames []string) func(ctr *oci.Container) bool {
if len(idsOrNames) == 0 {
return func(*oci.Container) bool { return true }
}
return func(ctr *oci.Container) bool {
for _, idOrName := range idsOrNames {
if strings.HasPrefix(ctr.ID(), idOrName) || strings.HasSuffix(ctr.Name(), idOrName) {
return true
}
}
return false
}
}

View file

@ -219,6 +219,25 @@ _kpod_rmi() {
esac
}
_kpod_stats() {
local boolean_options="
--help
--all
-a
--no-stream
--format
"
case "$cur" in
-*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
;;
*)
__kpod_list_containers
;;
esac
}
kpod_tag() {
local options_with_args="
"
@ -304,6 +323,7 @@ _kpod_kpod() {
pull
push
rmi
stats
tag
umount
unmount

37
docs/kpod-stats.1.md Normal file
View file

@ -0,0 +1,37 @@
% kpod(1) kpod-stats - Display a live stream of 1 or more containers' resource usage statistics
% Ryan Cole
# kpod-stats "1" "July 2017" "kpod"
## NAME
kpod-stats - Display a live stream of 1 or more containers' resource usage statistics
## SYNOPSIS
**kpod** **stats** [*options* [...]] [container]
## DESCRIPTION
Display a live stream of one or more containers' resource usage statistics
## OPTIONS
**--all, -a**
Show all containers. Only running containers are shown by default
**--no-stream**
Disable streaming stats and only pull the first result, default setting is false
**--format="TEMPLATE"**
Pretty-print images using a Go template
## EXAMPLE
TODO
## SEE ALSO
kpod(1)
## HISTORY
July 2017, Originally compiled by Ryan Cole <rycole@redhat.com>

18446
kpod-images.json Normal file

File diff suppressed because it is too large Load diff

View file

@ -17,6 +17,7 @@ import (
"github.com/kubernetes-incubator/cri-o/pkg/annotations"
"github.com/kubernetes-incubator/cri-o/pkg/registrar"
"github.com/kubernetes-incubator/cri-o/pkg/storage"
"github.com/opencontainers/runc/libcontainer"
rspec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
@ -195,6 +196,12 @@ func (c *ContainerServer) Update() error {
if _, ok := oldPodContainers[id]; !ok {
// this container's ID wasn't in the updated list -> removed
removedPodContainers[id] = id
} else {
ctr := c.GetContainer(id)
if ctr != nil {
// if the container exists, update its state
c.ContainerStateFromDisk(c.GetContainer(id))
}
}
})
for removedPodContainer := range removedPodContainers {
@ -583,13 +590,31 @@ func (c *ContainerServer) RemoveContainer(ctr *oci.Container) {
c.state.containers.Delete(ctr.ID())
}
// ListContainers returns a list of all containers stored by the server state
func (c *ContainerServer) ListContainers() []*oci.Container {
// listContainers returns a list of all containers stored by the server state
func (c *ContainerServer) listContainers() []*oci.Container {
c.stateLock.Lock()
defer c.stateLock.Unlock()
return c.state.containers.List()
}
// ListContainers returns a list of all containers stored by the server state
// that match the given filter function
func (c *ContainerServer) ListContainers(filters ...func(*oci.Container) bool) ([]*oci.Container, error) {
containers := c.listContainers()
if len(filters) == 0 {
return containers, nil
}
filteredContainers := make([]*oci.Container, 0, len(containers))
for _, container := range containers {
for _, filter := range filters {
if filter(container) {
filteredContainers = append(filteredContainers, container)
}
}
}
return filteredContainers, nil
}
// AddSandbox adds a sandbox to the sandbox state store
func (c *ContainerServer) AddSandbox(sb *sandbox.Sandbox) {
c.stateLock.Lock()
@ -641,3 +666,20 @@ func (c *ContainerServer) ListSandboxes() []*sandbox.Sandbox {
return sbArray
}
// LibcontainerStats gets the stats for the container with the given id from runc/libcontainer
func (c *ContainerServer) LibcontainerStats(ctr *oci.Container) (*libcontainer.Stats, error) {
// TODO: make this not hardcoded
// was: c.runtime.Path(ociContainer) but that returns /usr/bin/runc - how do we get /run/runc?
// runroot is /var/run/runc
// Hardcoding probably breaks ClearContainers compatibility
factory, err := loadFactory("/run/runc")
if err != nil {
return nil, err
}
container, err := factory.Load(ctr.ID())
if err != nil {
return nil, err
}
return container.Stats()
}

View file

@ -161,16 +161,15 @@ func MatchesReference(name, argName string) bool {
}
// FormattedSize returns a human-readable formatted size for the image
func FormattedSize(size int64) string {
func FormattedSize(size float64) string {
suffixes := [5]string{"B", "KB", "MB", "GB", "TB"}
count := 0
formattedSize := float64(size)
for formattedSize >= 1024 && count < 4 {
formattedSize /= 1024
for size >= 1024 && count < 4 {
size /= 1024
count++
}
return fmt.Sprintf("%.4g %s", formattedSize, suffixes[count])
return fmt.Sprintf("%.4g %s", size, suffixes[count])
}
// FindImage searches for a *storage.Image with a matching the given name or ID in the given store.

111
libkpod/stats.go Normal file
View file

@ -0,0 +1,111 @@
package libkpod
import (
"path/filepath"
"syscall"
"time"
"strings"
"github.com/kubernetes-incubator/cri-o/oci"
"github.com/opencontainers/runc/libcontainer"
)
// ContainerStats contains the statistics information for a running container
type ContainerStats struct {
Container string
CPU float64
cpuNano uint64
systemNano uint64
MemUsage uint64
MemLimit uint64
MemPerc float64
NetInput uint64
NetOutput uint64
BlockInput uint64
BlockOutput uint64
PIDs uint64
}
// GetContainerStats gets the running stats for a given container
func (c *ContainerServer) GetContainerStats(ctr *oci.Container, previousStats *ContainerStats) (*ContainerStats, error) {
previousCPU := previousStats.cpuNano
previousSystem := previousStats.systemNano
libcontainerStats, err := c.LibcontainerStats(ctr)
if err != nil {
return nil, err
}
cgroupStats := libcontainerStats.CgroupStats
stats := new(ContainerStats)
stats.Container = ctr.ID()
stats.CPU = calculateCPUPercent(libcontainerStats, previousCPU, previousSystem)
stats.MemUsage = cgroupStats.MemoryStats.Usage.Usage
stats.MemLimit = getMemLimit(cgroupStats.MemoryStats.Usage.Limit)
stats.MemPerc = float64(stats.MemUsage) / float64(stats.MemLimit)
stats.PIDs = cgroupStats.PidsStats.Current
stats.BlockInput, stats.BlockOutput = calculateBlockIO(libcontainerStats)
stats.NetInput, stats.NetOutput = getContainerNetIO(libcontainerStats)
return stats, nil
}
func loadFactory(root string) (libcontainer.Factory, error) {
abs, err := filepath.Abs(root)
if err != nil {
return nil, err
}
cgroupManager := libcontainer.Cgroupfs
return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(""))
}
// getMemory limit returns the memory limit for a given cgroup
// If the configured memory limit is larger than the total memory on the sys, the
// physical system memory size is returned
func getMemLimit(cgroupLimit uint64) uint64 {
si := &syscall.Sysinfo_t{}
err := syscall.Sysinfo(si)
if err != nil {
return cgroupLimit
}
physicalLimit := uint64(si.Totalram)
if cgroupLimit > physicalLimit {
return physicalLimit
}
return cgroupLimit
}
// Returns the total number of bytes transmitted and received for the given container stats
func getContainerNetIO(stats *libcontainer.Stats) (received uint64, transmitted uint64) {
for _, iface := range stats.Interfaces {
received += iface.RxBytes
transmitted += iface.TxBytes
}
return
}
func calculateCPUPercent(stats *libcontainer.Stats, previousCPU, previousSystem uint64) float64 {
var (
cpuPercent = 0.0
cpuDelta = float64(stats.CgroupStats.CpuStats.CpuUsage.TotalUsage - previousCPU)
systemDelta = float64(uint64(time.Now().UnixNano()) - previousSystem)
)
if systemDelta > 0.0 && cpuDelta > 0.0 {
// gets a ratio of container cpu usage total, multiplies it by the number of cores (4 cores running
// at 100% utilization should be 400% utilization), and multiplies that by 100 to get a percentage
cpuPercent = (cpuDelta / systemDelta) * float64(len(stats.CgroupStats.CpuStats.CpuUsage.PercpuUsage)) * 100
}
return cpuPercent
}
func calculateBlockIO(stats *libcontainer.Stats) (read uint64, write uint64) {
for _, blkIOEntry := range stats.CgroupStats.BlkioStats.IoServiceBytesRecursive {
switch strings.ToLower(blkIOEntry.Op) {
case "read":
read += blkIOEntry.Value
case "write":
write += blkIOEntry.Value
}
}
return
}

View file

@ -31,36 +31,37 @@ func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersReque
logrus.Debugf("ListContainersRequest %+v", req)
var ctrs []*pb.Container
filter := req.Filter
ctrList := s.ContainerServer.ListContainers()
ctrList, err := s.ContainerServer.ListContainers()
if err != nil {
return nil, err
}
// Filter using container id and pod id first.
if filter != nil {
if filter.Id != "" {
id, err := s.CtrIDIndex().Get(filter.Id)
if err != nil {
return nil, err
}
c := s.ContainerServer.GetContainer(id)
if c != nil {
if filter.PodSandboxId != "" {
if c.Sandbox() == filter.PodSandboxId {
ctrList = []*oci.Container{c}
} else {
ctrList = []*oci.Container{}
}
} else {
ctrList = []*oci.Container{c}
}
}
} else {
if filter.Id != "" {
id, err := s.CtrIDIndex().Get(filter.Id)
if err != nil {
return nil, err
}
c := s.ContainerServer.GetContainer(id)
if c != nil {
if filter.PodSandboxId != "" {
pod := s.ContainerServer.GetSandbox(filter.PodSandboxId)
if pod == nil {
ctrList = []*oci.Container{}
if c.Sandbox() == filter.PodSandboxId {
ctrList = []*oci.Container{c}
} else {
ctrList = pod.Containers().List()
ctrList = []*oci.Container{}
}
} else {
ctrList = []*oci.Container{c}
}
}
} else {
if filter.PodSandboxId != "" {
pod := s.ContainerServer.GetSandbox(filter.PodSandboxId)
if pod == nil {
ctrList = []*oci.Container{}
} else {
ctrList = pod.Containers().List()
}
}
}

107
test/kpod_stats.bats Normal file
View file

@ -0,0 +1,107 @@
#!/usr/bin/env bats
load helpers
ROOT="$TESTDIR/crio"
RUNROOT="$TESTDIR/crio-run"
KPOD_OPTIONS="--root $ROOT --runroot $RUNROOT $STORAGE_OPTS"
function teardown() {
cleanup_test
}
@test "stats single output" {
start_crio
run crioctl pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run crioctl ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run crioctl ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ${KPOD_BINARY} $KPOD_OPTIONS stats --no-stream "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_crio
}
@test "stats does not output stopped container" {
start_crio
run crioctl pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run crioctl ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ${KPOD_BINARY} $KPOD_OPTIONS stats --no-stream
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_crio
}
@test "stats outputs stopped container with all flag" {
start_crio
run crioctl pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run crioctl ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run ${KPOD_BINARY} $KPOD_OPTIONS stats --no-stream --all
echo "$output"
[ "$status" -eq 0 ]
cleanup_ctrs
cleanup_pods
stop_crio
}
@test "stats output only id" {
start_crio
run crioctl pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run crioctl ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run crioctl ctr start --id "$ctr_id"
echo "$output"
[ "$status" -eq 0 ]
run ${KPOD_BINARY} $KPOD_OPTIONS stats --no-stream --format {{.ID}} "$ctr_id"
[ "$status" -eq 0 ]
# once ps is implemented, run ps -q and see if that equals the output from above
cleanup_ctrs
cleanup_pods
stop_crio
}
@test "stats streaming output" {
start_crio
run crioctl pod run --config "$TESTDATA"/sandbox_config.json
echo "$output"
[ "$status" -eq 0 ]
pod_id="$output"
run crioctl ctr create --config "$TESTDATA"/container_redis.json --pod "$pod_id"
echo "$output"
[ "$status" -eq 0 ]
ctr_id="$output"
run timeout 5s bash -c "${KPOD_BINARY} $KPOD_OPTIONS stats --all"
echo "$output"
[ "$status" -eq 124 ] #124 is the status set by timeout when it has to kill the command at the end of the given time
cleanup_ctrs
cleanup_pods
stop_crio
}

View file

@ -14,6 +14,9 @@ github.com/opencontainers/selinux v1.0.0-rc1
github.com/opencontainers/go-digest v1.0.0-rc0
github.com/opencontainers/runtime-tools 6bcd3b417fd6962ea04dafdbc2c07444e750572d
github.com/opencontainers/runc 45bde006ca8c90e089894508708bcf0e2cdf9e13
github.com/mrunalp/fileutils master
github.com/vishvananda/netlink master
github.com/vishvananda/netns master
github.com/opencontainers/image-spec v1.0.0
github.com/opencontainers/runtime-spec v1.0.0
github.com/juju/ratelimit acf38b000a03e4ab89e40f20f1e548f4e6ac7f72
@ -81,3 +84,4 @@ github.com/hpcloud/tail v1.0.0
gopkg.in/fsnotify.v1 v1.4.2
gopkg.in/tomb.v1 v1
github.com/fatih/camelcase f6a740d52f961c60348ebb109adde9f4635d7540
github.com/buger/goterm 2f8dfbc7dbbff5dd1d391ed91482c24df243b2d3

119
vendor/github.com/buger/goterm/README.md generated vendored Normal file
View file

@ -0,0 +1,119 @@
## Description
This library provides basic building blocks for building advanced console UIs.
Initially created for [Gor](http://github.com/buger/gor).
Full API documentation: http://godoc.org/github.com/buger/goterm
## Basic usage
Full screen console app, printing current time:
```go
import (
tm "github.com/buger/goterm"
"time"
)
func main() {
tm.Clear() // Clear current screen
for {
// By moving cursor to top-left position we ensure that console output
// will be overwritten each time, instead of adding new.
tm.MoveCursor(1,1)
tm.Println("Current Time:", time.Now().Format(time.RFC1123))
tm.Flush() // Call it every time at the end of rendering
time.Sleep(time.Second)
}
}
```
This can be seen in [examples/time_example.go](examples/time_example.go). To
run it yourself, go into your `$GOPATH/src/github.com/buger/goterm` directory
and run `go run ./examples/time_example.go`
Print red bold message on white background:
```go
tm.Println(tm.Background(tm.Color(tm.Bold("Important header"), tm.RED), tm.WHITE))
```
Create box and move it to center of the screen:
```go
tm.Clear()
// Create Box with 30% width of current screen, and height of 20 lines
box := tm.NewBox(30|tm.PCT, 20, 0)
// Add some content to the box
// Note that you can add ANY content, even tables
fmt.Fprint(box, "Some box content")
// Move Box to approx center of the screen
tm.Print(tm.MoveTo(box.String(), 40|tm.PCT, 40|tm.PCT))
tm.Flush()
```
This can be found in [examples/box_example.go](examples/box_example.go).
Draw table:
```go
// Based on http://golang.org/pkg/text/tabwriter
totals := tm.NewTable(0, 10, 5, ' ', 0)
fmt.Fprintf(totals, "Time\tStarted\tActive\tFinished\n")
fmt.Fprintf(totals, "%s\t%d\t%d\t%d\n", "All", started, started-finished, finished)
tm.Println(totals)
tm.Flush()
```
This can be found in [examples/table_example.go](examples/table_example.go).
## Line charts
Chart example:
![screen shot 2013-07-09 at 5 05 37 pm](https://f.cloud.github.com/assets/14009/767676/e3dd35aa-e887-11e2-9cd2-f6451eb26adc.png)
```go
import (
tm "github.com/buger/goterm"
)
chart := tm.NewLineChart(100, 20)
data := new(tm.DataTable)
data.addColumn("Time")
data.addColumn("Sin(x)")
data.addColumn("Cos(x+1)")
for i := 0.1; i < 10; i += 0.1 {
data.addRow(i, math.Sin(i), math.Cos(i+1))
}
tm.Println(chart.Draw(data))
```
This can be found in [examples/chart_example.go](examples/chart_example.go).
Drawing 2 separate graphs in different scales. Each graph have its own Y axe.
```go
chart.Flags = tm.DRAW_INDEPENDENT
```
Drawing graph with relative scale (Grapwh draw starting from min value instead of zero)
```go
chart.Flags = tm.DRAW_RELATIVE
```

122
vendor/github.com/buger/goterm/box.go generated vendored Normal file
View file

@ -0,0 +1,122 @@
package goterm
import (
"bytes"
"strings"
)
const DEFAULT_BORDER = "- │ ┌ ┐ └ ┘"
// Box allows you to create independent parts of screen, with its own buffer and borders.
// Can be used for creating modal windows
//
// Generates boxes likes this:
// ┌--------┐
// │hello │
// │world │
// │ │
// └--------┘
//
type Box struct {
Buf *bytes.Buffer
Width int
Height int
// To get even padding: PaddingX ~= PaddingY*4
PaddingX int
PaddingY int
// Should contain 6 border pieces separated by spaces
//
// Example border:
// "- │ ┌ ┐ └ ┘"
Border string
Flags int // Not used now
}
// Create new Box.
// Width and height can be relative:
//
// // Create box with 50% with of current screen and 10 lines height
// box := tm.NewBox(50|tm.PCT, 10, 0)
//
func NewBox(width, height int, flags int) *Box {
width, height = GetXY(width, height)
box := new(Box)
box.Buf = new(bytes.Buffer)
box.Width = width
box.Height = height
box.Border = DEFAULT_BORDER
box.PaddingX = 1
box.PaddingY = 0
box.Flags = flags
return box
}
func (b *Box) Write(p []byte) (int, error) {
return b.Buf.Write(p)
}
// Render Box
func (b *Box) String() (out string) {
borders := strings.Split(b.Border, " ")
lines := strings.Split(b.Buf.String(), "\n")
// Border + padding
prefix := borders[1] + strings.Repeat(" ", b.PaddingX)
suffix := strings.Repeat(" ", b.PaddingX) + borders[1]
offset := b.PaddingY + 1 // 1 is border width
// Content width without borders and padding
contentWidth := b.Width - (b.PaddingX+1)*2
for y := 0; y < b.Height; y++ {
var line string
switch {
// Draw borders for first line
case y == 0:
line = borders[2] + strings.Repeat(borders[0], b.Width-2) + borders[3]
// Draw borders for last line
case y == (b.Height - 1):
line = borders[4] + strings.Repeat(borders[0], b.Width-2) + borders[5]
// Draw top and bottom padding
case y <= b.PaddingY || y >= (b.Height-b.PaddingY):
line = borders[1] + strings.Repeat(" ", b.Width-2) + borders[1]
// Render content
default:
if len(lines) > y-offset {
line = lines[y-offset]
} else {
line = ""
}
if len(line) > contentWidth-1 {
// If line is too large limit it
line = line[0:contentWidth]
} else {
// If line is too small enlarge it by adding spaces
line = line + strings.Repeat(" ", contentWidth-len(line))
}
line = prefix + line + suffix
}
// Don't add newline for last element
if y != b.Height-1 {
line = line + "\n"
}
out += line
}
return out
}

328
vendor/github.com/buger/goterm/plot.go generated vendored Normal file
View file

@ -0,0 +1,328 @@
package goterm
import (
"fmt"
"math"
"strings"
)
const (
AXIS_LEFT = iota
AXIS_RIGHT
)
const (
DRAW_INDEPENDENT = 1 << iota
DRAW_RELATIVE
)
type DataTable struct {
columns []string
rows [][]float64
}
func (d *DataTable) AddColumn(name string) {
d.columns = append(d.columns, name)
}
func (d *DataTable) AddRow(elms ...float64) {
d.rows = append(d.rows, elms)
}
type Chart interface {
Draw(data DataTable, flags int) string
}
type LineChart struct {
Buf []string
chartBuf []string
data *DataTable
Width int
Height int
chartHeight int
chartWidth int
paddingX int
paddingY int
Flags int
}
func genBuf(size int) []string {
buf := make([]string, size)
for i := 0; i < size; i++ {
buf[i] = " "
}
return buf
}
// Format float
func ff(num interface{}) string {
return fmt.Sprintf("%.1f", num)
}
func NewLineChart(width, height int) *LineChart {
chart := new(LineChart)
chart.Width = width
chart.Height = height
chart.Buf = genBuf(width * height)
// axis lines + axies text
chart.paddingY = 2
return chart
}
func (c *LineChart) DrawAxes(maxX, minX, maxY, minY float64, index int) {
side := AXIS_LEFT
if c.Flags&DRAW_INDEPENDENT != 0 {
if index%2 == 0 {
side = AXIS_RIGHT
}
c.DrawLine(c.paddingX-1, 1, c.Width-c.paddingX, 1, "-")
} else {
c.DrawLine(c.paddingX-1, 1, c.Width-1, 1, "-")
}
if side == AXIS_LEFT {
c.DrawLine(c.paddingX-1, 1, c.paddingX-1, c.Height-1, "│")
} else {
c.DrawLine(c.Width-c.paddingX, 1, c.Width-c.paddingX, c.Height-1, "│")
}
left := 0
if side == AXIS_RIGHT {
left = c.Width - c.paddingX + 1
}
if c.Flags&DRAW_RELATIVE != 0 {
c.writeText(ff(minY), left, 1)
} else {
if minY > 0 {
c.writeText("0", left, 1)
} else {
c.writeText(ff(minY), left, 1)
}
}
c.writeText(ff(maxY), left, c.Height-1)
c.writeText(ff(minX), c.paddingX, 0)
x_col := c.data.columns[0]
c.writeText(c.data.columns[0], c.Width/2-len(x_col)/2, 1)
if c.Flags&DRAW_INDEPENDENT != 0 || len(c.data.columns) < 3 {
col := c.data.columns[index]
for idx, char := range strings.Split(col, "") {
start_from := c.Height/2 + len(col)/2 - idx
if side == AXIS_LEFT {
c.writeText(char, c.paddingX-1, start_from)
} else {
c.writeText(char, c.Width-c.paddingX, start_from)
}
}
}
if c.Flags&DRAW_INDEPENDENT != 0 {
c.writeText(ff(maxX), c.Width-c.paddingX-len(ff(maxX)), 0)
} else {
c.writeText(ff(maxX), c.Width-len(ff(maxX)), 0)
}
}
func (c *LineChart) writeText(text string, x, y int) {
coord := y*c.Width + x
for idx, char := range strings.Split(text, "") {
c.Buf[coord+idx] = char
}
}
func (c *LineChart) Draw(data *DataTable) (out string) {
var scaleY, scaleX float64
c.data = data
if c.Flags&DRAW_INDEPENDENT != 0 && len(data.columns) > 3 {
fmt.Println("Error: Can't use DRAW_INDEPENDENT for more then 2 graphs")
return ""
}
charts := len(data.columns) - 1
prevPoint := [2]int{-1, -1}
maxX, minX, maxY, minY := getBoundaryValues(data, -1)
c.paddingX = int(math.Max(float64(len(ff(minY))), float64(len(ff(maxY))))) + 1
c.chartHeight = c.Height - c.paddingY
if c.Flags&DRAW_INDEPENDENT != 0 {
c.chartWidth = c.Width - 2*c.paddingX
} else {
c.chartWidth = c.Width - c.paddingX - 1
}
scaleX = float64(c.chartWidth) / (maxX - minX)
if c.Flags&DRAW_RELATIVE != 0 || minY < 0 {
scaleY = float64(c.chartHeight) / (maxY - minY)
} else {
scaleY = float64(c.chartHeight) / maxY
}
for i := 1; i < charts+1; i++ {
if c.Flags&DRAW_INDEPENDENT != 0 {
maxX, minX, maxY, minY = getBoundaryValues(data, i)
scaleX = float64(c.chartWidth-1) / (maxX - minX)
scaleY = float64(c.chartHeight) / maxY
if c.Flags&DRAW_RELATIVE != 0 || minY < 0 {
scaleY = float64(c.chartHeight) / (maxY - minY)
}
}
symbol := Color("•", i)
c_data := getChartData(data, i)
for _, point := range c_data {
x := int((point[0]-minX)*scaleX) + c.paddingX
y := int((point[1])*scaleY) + c.paddingY
if c.Flags&DRAW_RELATIVE != 0 || minY < 0 {
y = int((point[1]-minY)*scaleY) + c.paddingY
}
if prevPoint[0] == -1 {
prevPoint[0] = x
prevPoint[1] = y
}
if prevPoint[0] <= x {
c.DrawLine(prevPoint[0], prevPoint[1], x, y, symbol)
}
prevPoint[0] = x
prevPoint[1] = y
}
c.DrawAxes(maxX, minX, maxY, minY, i)
}
for row := c.Height - 1; row >= 0; row-- {
out += strings.Join(c.Buf[row*c.Width:(row+1)*c.Width], "") + "\n"
}
return
}
func (c *LineChart) DrawLine(x0, y0, x1, y1 int, symbol string) {
drawLine(x0, y0, x1, y1, func(x, y int) {
coord := y*c.Width + x
if coord > 0 && coord < len(c.Buf) {
c.Buf[coord] = symbol
}
})
}
func getBoundaryValues(data *DataTable, index int) (maxX, minX, maxY, minY float64) {
maxX = data.rows[0][0]
minX = data.rows[0][0]
maxY = data.rows[0][1]
minY = data.rows[0][1]
for _, r := range data.rows {
maxX = math.Max(maxX, r[0])
minX = math.Min(minX, r[0])
for idx, c := range r {
if idx > 0 {
if index == -1 || index == idx {
maxY = math.Max(maxY, c)
minY = math.Min(minY, c)
}
}
}
}
if maxY > 0 {
maxY = maxY * 1.1
} else {
maxY = maxY * 0.9
}
if minY > 0 {
minY = minY * 0.9
} else {
minY = minY * 1.1
}
return
}
// DataTable can contain data for multiple graphs, we need to extract only 1
func getChartData(data *DataTable, index int) (out [][]float64) {
for _, r := range data.rows {
out = append(out, []float64{r[0], r[index]})
}
return
}
// Algorithm for drawing line between two points
//
// http://en.wikipedia.org/wiki/Bresenham's_line_algorithm
func drawLine(x0, y0, x1, y1 int, plot func(int, int)) {
dx := x1 - x0
if dx < 0 {
dx = -dx
}
dy := y1 - y0
if dy < 0 {
dy = -dy
}
var sx, sy int
if x0 < x1 {
sx = 1
} else {
sx = -1
}
if y0 < y1 {
sy = 1
} else {
sy = -1
}
err := dx - dy
for {
plot(x0, y0)
if x0 == x1 && y0 == y1 {
break
}
e2 := 2 * err
if e2 > -dy {
err -= dy
x0 += sx
}
if e2 < dx {
err += dx
y0 += sy
}
}
}

34
vendor/github.com/buger/goterm/table.go generated vendored Normal file
View file

@ -0,0 +1,34 @@
package goterm
import (
"bytes"
"text/tabwriter"
)
// Tabwriter with own buffer:
//
// totals := tm.NewTable(0, 10, 5, ' ', 0)
// fmt.Fprintf(totals, "Time\tStarted\tActive\tFinished\n")
// fmt.Fprintf(totals, "%s\t%d\t%d\t%d\n", "All", started, started-finished, finished)
// tm.Println(totals)
//
// Based on http://golang.org/pkg/text/tabwriter
type Table struct {
tabwriter.Writer
Buf *bytes.Buffer
}
// Same as here http://golang.org/pkg/text/tabwriter/#Writer.Init
func NewTable(minwidth, tabwidth, padding int, padchar byte, flags uint) *Table {
tbl := new(Table)
tbl.Buf = new(bytes.Buffer)
tbl.Init(tbl.Buf, minwidth, tabwidth, padding, padchar, flags)
return tbl
}
func (t *Table) String() string {
t.Flush()
return t.Buf.String()
}

258
vendor/github.com/buger/goterm/terminal.go generated vendored Normal file
View file

@ -0,0 +1,258 @@
// Provides basic bulding blocks for advanced console UI
//
// Coordinate system:
//
// 1/1---X---->
// |
// Y
// |
// v
//
// Documentation for ANSI codes: http://en.wikipedia.org/wiki/ANSI_escape_code#Colors
//
// Inspired by: http://www.darkcoding.net/software/pretty-command-line-console-output-on-unix-in-python-and-go-lang/
package goterm
import (
"bufio"
"bytes"
"fmt"
"os"
"strings"
)
// Reset all custom styles
const RESET = "\033[0m"
// Reset to default color
const RESET_COLOR = "\033[32m"
// Return curor to start of line and clean it
const RESET_LINE = "\r\033[K"
// List of possible colors
const (
BLACK = iota
RED
GREEN
YELLOW
BLUE
MAGENTA
CYAN
WHITE
)
var Output *bufio.Writer = bufio.NewWriter(os.Stdout)
func getColor(code int) string {
return fmt.Sprintf("\033[3%dm", code)
}
func getBgColor(code int) string {
return fmt.Sprintf("\033[4%dm", code)
}
// Set percent flag: num | PCT
//
// Check percent flag: num & PCT
//
// Reset percent flag: num & 0xFF
const shift = uint(^uint(0)>>63) << 4
const PCT = 0x8000 << shift
type winsize struct {
Row uint16
Col uint16
Xpixel uint16
Ypixel uint16
}
// Global screen buffer
// Its not recommented write to buffer dirrectly, use package Print,Printf,Println fucntions instead.
var Screen *bytes.Buffer = new(bytes.Buffer)
// Get relative or absolute coorditantes
// To get relative, set PCT flag to number:
//
// // Get 10% of total width to `x` and 20 to y
// x, y = tm.GetXY(10|tm.PCT, 20)
//
func GetXY(x int, y int) (int, int) {
if y == -1 {
y = CurrentHeight() + 1
}
if x&PCT != 0 {
x = int((x & 0xFF) * Width() / 100)
}
if y&PCT != 0 {
y = int((y & 0xFF) * Height() / 100)
}
return x, y
}
type sf func(int, string) string
// Apply given transformation func for each line in string
func applyTransform(str string, transform sf) (out string) {
out = ""
for idx, line := range strings.Split(str, "\n") {
out += transform(idx, line)
}
return
}
// Clear screen
func Clear() {
Output.WriteString("\033[2J")
}
// Move cursor to given position
func MoveCursor(x int, y int) {
fmt.Fprintf(Screen, "\033[%d;%dH", x, y)
}
// Move cursor up relative the current position
func MoveCursorUp(bias int) {
fmt.Fprintf(Screen, "\033[%dA", bias)
}
// Move cursor down relative the current position
func MoveCursorDown(bias int) {
fmt.Fprintf(Screen, "\033[%dB", bias)
}
// Move cursor forward relative the current position
func MoveCursorForward(bias int) {
fmt.Fprintf(Screen, "\033[%dC", bias)
}
// Move cursor backward relative the current position
func MoveCursorBackward(bias int) {
fmt.Fprintf(Screen, "\033[%dD", bias)
}
// Move string to possition
func MoveTo(str string, x int, y int) (out string) {
x, y = GetXY(x, y)
return applyTransform(str, func(idx int, line string) string {
return fmt.Sprintf("\033[%d;%dH%s", y+idx, x, line)
})
}
// Return carrier to start of line
func ResetLine(str string) (out string) {
return applyTransform(str, func(idx int, line string) string {
return fmt.Sprintf(RESET_LINE, line)
})
}
// Make bold
func Bold(str string) string {
return applyTransform(str, func(idx int, line string) string {
return fmt.Sprintf("\033[1m%s\033[0m", line)
})
}
// Apply given color to string:
//
// tm.Color("RED STRING", tm.RED)
//
func Color(str string, color int) string {
return applyTransform(str, func(idx int, line string) string {
return fmt.Sprintf("%s%s%s", getColor(color), line, RESET)
})
}
func Highlight(str, substr string, color int) string {
hiSubstr := Color(substr, color)
return strings.Replace(str, substr, hiSubstr, -1)
}
func HighlightRegion(str string, from, to, color int) string {
return str[:from] + Color(str[from:to], color) + str[to:]
}
// Change background color of string:
//
// tm.Background("string", tm.RED)
//
func Background(str string, color int) string {
return applyTransform(str, func(idx int, line string) string {
return fmt.Sprintf("%s%s%s", getBgColor(color), line, RESET)
})
}
// Get console width
func Width() int {
ws, err := getWinsize()
if err != nil {
return -1
}
return int(ws.Col)
}
// Get console height
func Height() int {
ws, err := getWinsize()
if err != nil {
return -1
}
return int(ws.Row)
}
// Get current height. Line count in Screen buffer.
func CurrentHeight() int {
return strings.Count(Screen.String(), "\n")
}
// Flush buffer and ensure that it will not overflow screen
func Flush() {
for idx, str := range strings.Split(Screen.String(), "\n") {
if idx > Height() {
return
}
Output.WriteString(str + "\n")
}
Output.Flush()
Screen.Reset()
}
func Print(a ...interface{}) (n int, err error) {
return fmt.Fprint(Screen, a...)
}
func Println(a ...interface{}) (n int, err error) {
return fmt.Fprintln(Screen, a...)
}
func Printf(format string, a ...interface{}) (n int, err error) {
return fmt.Fprintf(Screen, format, a...)
}
func Context(data string, idx, max int) string {
var start, end int
if len(data[:idx]) < (max / 2) {
start = 0
} else {
start = idx - max/2
}
if len(data)-idx < (max / 2) {
end = len(data) - 1
} else {
end = idx + max/2
}
return data[start:end]
}

12
vendor/github.com/buger/goterm/terminal_nosysioctl.go generated vendored Normal file
View file

@ -0,0 +1,12 @@
// +build windows plan9 solaris
package goterm
func getWinsize() (*winsize, error) {
ws := new(winsize)
ws.Col = 80
ws.Row = 24
return ws, nil
}

36
vendor/github.com/buger/goterm/terminal_sysioctl.go generated vendored Normal file
View file

@ -0,0 +1,36 @@
// +build !windows,!plan9,!solaris
package goterm
import (
"fmt"
"os"
"runtime"
"syscall"
"unsafe"
)
func getWinsize() (*winsize, error) {
ws := new(winsize)
var _TIOCGWINSZ int64
switch runtime.GOOS {
case "linux":
_TIOCGWINSZ = 0x5413
case "darwin":
_TIOCGWINSZ = 1074295912
}
r1, _, errno := syscall.Syscall(syscall.SYS_IOCTL,
uintptr(syscall.Stdin),
uintptr(_TIOCGWINSZ),
uintptr(unsafe.Pointer(ws)),
)
if int(r1) == -1 {
fmt.Println("Error:", os.NewSyscallError("GetWinsize", errno))
return nil, os.NewSyscallError("GetWinsize", errno)
}
return ws, nil
}

191
vendor/github.com/mrunalp/fileutils/LICENSE generated vendored Normal file
View file

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2014 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

5
vendor/github.com/mrunalp/fileutils/README.md generated vendored Normal file
View file

@ -0,0 +1,5 @@
# fileutils
Collection of utilities for file manipulation in golang
The library is based on docker pkg/archive pkg/idtools but does copies instead of handling archive formats.

161
vendor/github.com/mrunalp/fileutils/fileutils.go generated vendored Normal file
View file

@ -0,0 +1,161 @@
package fileutils
import (
"fmt"
"io"
"os"
"path/filepath"
"syscall"
)
// CopyFile copies the file at source to dest
func CopyFile(source string, dest string) error {
si, err := os.Lstat(source)
if err != nil {
return err
}
st, ok := si.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("could not convert to syscall.Stat_t")
}
uid := int(st.Uid)
gid := int(st.Gid)
// Handle symlinks
if si.Mode()&os.ModeSymlink != 0 {
target, err := os.Readlink(source)
if err != nil {
return err
}
if err := os.Symlink(target, dest); err != nil {
return err
}
}
// Handle device files
if st.Mode&syscall.S_IFMT == syscall.S_IFBLK || st.Mode&syscall.S_IFMT == syscall.S_IFCHR {
devMajor := int64(major(uint64(st.Rdev)))
devMinor := int64(minor(uint64(st.Rdev)))
mode := uint32(si.Mode() & 07777)
if st.Mode&syscall.S_IFMT == syscall.S_IFBLK {
mode |= syscall.S_IFBLK
}
if st.Mode&syscall.S_IFMT == syscall.S_IFCHR {
mode |= syscall.S_IFCHR
}
if err := syscall.Mknod(dest, mode, int(mkdev(devMajor, devMinor))); err != nil {
return err
}
}
// Handle regular files
if si.Mode().IsRegular() {
sf, err := os.Open(source)
if err != nil {
return err
}
defer sf.Close()
df, err := os.Create(dest)
if err != nil {
return err
}
defer df.Close()
_, err = io.Copy(df, sf)
if err != nil {
return err
}
}
// Chown the file
if err := os.Lchown(dest, uid, gid); err != nil {
return err
}
// Chmod the file
if !(si.Mode()&os.ModeSymlink == os.ModeSymlink) {
if err := os.Chmod(dest, si.Mode()); err != nil {
return err
}
}
return nil
}
// CopyDirectory copies the files under the source directory
// to dest directory. The dest directory is created if it
// does not exist.
func CopyDirectory(source string, dest string) error {
fi, err := os.Stat(source)
if err != nil {
return err
}
// Get owner.
st, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("could not convert to syscall.Stat_t")
}
// We have to pick an owner here anyway.
if err := MkdirAllNewAs(dest, fi.Mode(), int(st.Uid), int(st.Gid)); err != nil {
return err
}
return filepath.Walk(source, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Get the relative path
relPath, err := filepath.Rel(source, path)
if err != nil {
return nil
}
if info.IsDir() {
// Skip the source directory.
if path != source {
// Get the owner.
st, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("could not convert to syscall.Stat_t")
}
uid := int(st.Uid)
gid := int(st.Gid)
if err := os.Mkdir(filepath.Join(dest, relPath), info.Mode()); err != nil {
return err
}
if err := os.Lchown(filepath.Join(dest, relPath), uid, gid); err != nil {
return err
}
}
return nil
}
// Copy the file.
if err := CopyFile(path, filepath.Join(dest, relPath)); err != nil {
return err
}
return nil
})
}
func major(device uint64) uint64 {
return (device >> 8) & 0xfff
}
func minor(device uint64) uint64 {
return (device & 0xff) | ((device >> 12) & 0xfff00)
}
func mkdev(major int64, minor int64) uint32 {
return uint32(((minor & 0xfff00) << 12) | ((major & 0xfff) << 8) | (minor & 0xff))
}

49
vendor/github.com/mrunalp/fileutils/idtools.go generated vendored Normal file
View file

@ -0,0 +1,49 @@
package fileutils
import (
"os"
"path/filepath"
)
// MkdirAllNewAs creates a directory (include any along the path) and then modifies
// ownership ONLY of newly created directories to the requested uid/gid. If the
// directories along the path exist, no change of ownership will be performed
func MkdirAllNewAs(path string, mode os.FileMode, ownerUID, ownerGID int) error {
// make an array containing the original path asked for, plus (for mkAll == true)
// all path components leading up to the complete path that don't exist before we MkdirAll
// so that we can chown all of them properly at the end. If chownExisting is false, we won't
// chown the full directory path if it exists
var paths []string
if _, err := os.Stat(path); err != nil && os.IsNotExist(err) {
paths = []string{path}
} else if err == nil {
// nothing to do; directory path fully exists already
return nil
}
// walk back to "/" looking for directories which do not exist
// and add them to the paths array for chown after creation
dirPath := path
for {
dirPath = filepath.Dir(dirPath)
if dirPath == "/" {
break
}
if _, err := os.Stat(dirPath); err != nil && os.IsNotExist(err) {
paths = append(paths, dirPath)
}
}
if err := os.MkdirAll(path, mode); err != nil && !os.IsExist(err) {
return err
}
// even if it existed, we will chown the requested path + any subpaths that
// didn't exist when we called MkdirAll
for _, pathComponent := range paths {
if err := os.Chown(pathComponent, ownerUID, ownerGID); err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,114 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
var capabilityMap map[string]capability.Cap
func init() {
capabilityMap = make(map[string]capability.Cap)
last := capability.CAP_LAST_CAP
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
if last == capability.Cap(63) {
last = capability.CAP_BLOCK_SUSPEND
}
for _, cap := range capability.List() {
if cap > last {
continue
}
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
capabilityMap[capKey] = cap
}
}
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
bounding := []capability.Cap{}
for _, c := range capConfig.Bounding {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
bounding = append(bounding, v)
}
effective := []capability.Cap{}
for _, c := range capConfig.Effective {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
effective = append(effective, v)
}
inheritable := []capability.Cap{}
for _, c := range capConfig.Inheritable {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
inheritable = append(inheritable, v)
}
permitted := []capability.Cap{}
for _, c := range capConfig.Permitted {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
permitted = append(permitted, v)
}
ambient := []capability.Cap{}
for _, c := range capConfig.Ambient {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
ambient = append(ambient, v)
}
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
return &containerCapabilities{
bounding: bounding,
effective: effective,
inheritable: inheritable,
permitted: permitted,
ambient: ambient,
pid: pid,
}, nil
}
type containerCapabilities struct {
pid capability.Capabilities
bounding []capability.Cap
effective []capability.Cap
inheritable []capability.Cap
permitted []capability.Cap
ambient []capability.Cap
}
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
func (c *containerCapabilities) ApplyBoundingSet() error {
c.pid.Clear(capability.BOUNDS)
c.pid.Set(capability.BOUNDS, c.bounding...)
return c.pid.Apply(capability.BOUNDS)
}
// Apply sets all the capabilities for the current process in the config.
func (c *containerCapabilities) ApplyCaps() error {
c.pid.Clear(allCapabilityTypes)
c.pid.Set(capability.BOUNDS, c.bounding...)
c.pid.Set(capability.PERMITTED, c.permitted...)
c.pid.Set(capability.INHERITABLE, c.inheritable...)
c.pid.Set(capability.EFFECTIVE, c.effective...)
c.pid.Set(capability.AMBIENT, c.ambient...)
return c.pid.Apply(allCapabilityTypes)
}

View file

@ -0,0 +1,128 @@
// +build linux
package rootless
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.
var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}
type subsystem interface {
// Name returns the name of the subsystem.
Name() string
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}
// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}
// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}
// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()
path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}
paths[name] = path
}
m.Paths = paths
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
}
func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}
func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}

View file

@ -0,0 +1,10 @@
// +build linux,!go1.5
package libcontainer
import "syscall"
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
// with earlier versions
func enableSetgroups(sys *syscall.SysProcAttr) {
}

View file

@ -0,0 +1,117 @@
package validate
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
geteuid = os.Geteuid
getegid = os.Getegid
)
func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
return err
}
// Currently, cgroups cannot effectively be used in rootless containers.
// The new cgroup namespace doesn't really help us either because it doesn't
// have nice interactions with the user namespace (we're working with upstream
// to fix this).
if err := rootlessCgroup(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this
// has to be verified by setupUser() in init_linux.go.
return nil
}
func rootlessMappings(config *configs.Config) error {
rootuid, err := config.HostRootUID()
if err != nil {
return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
}
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
if rootuid != euid {
return fmt.Errorf("rootless containers cannot map container root to a different host user")
}
}
rootgid, err := config.HostRootGID()
if err != nil {
return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
}
// Similar to the above test, we need to make sure that we aren't trying to
// map to a group ID that we don't have the right to be.
if rootgid != getegid() {
return fmt.Errorf("rootless containers cannot map container root to a different host group")
}
// We can only map one user and group inside a container (our own).
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one user")
}
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one group")
}
return nil
}
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
}
// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())
// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
}
return nil
}
// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
for _, mount := range config.Mounts {
// Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
}
if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
}
}
}
return nil
}

View file

@ -0,0 +1,195 @@
package validate
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
selinux "github.com/opencontainers/selinux/go-selinux"
)
type Validator interface {
Validate(*configs.Config) error
}
func New() Validator {
return &ConfigValidator{}
}
type ConfigValidator struct {
}
func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.rootfs(config); err != nil {
return err
}
if err := v.network(config); err != nil {
return err
}
if err := v.hostname(config); err != nil {
return err
}
if err := v.security(config); err != nil {
return err
}
if err := v.usernamespace(config); err != nil {
return err
}
if err := v.sysctl(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
}
}
return nil
}
// rootfs validates if the rootfs is an absolute path and is not a symlink
// to the container's root filesystem.
func (v *ConfigValidator) rootfs(config *configs.Config) error {
if _, err := os.Stat(config.Rootfs); err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
}
return err
}
cleaned, err := filepath.Abs(config.Rootfs)
if err != nil {
return err
}
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
return err
}
if filepath.Clean(config.Rootfs) != cleaned {
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
}
return nil
}
func (v *ConfigValidator) network(config *configs.Config) error {
if !config.Namespaces.Contains(configs.NEWNET) {
if len(config.Networks) > 0 || len(config.Routes) > 0 {
return fmt.Errorf("unable to apply network settings without a private NET namespace")
}
}
return nil
}
func (v *ConfigValidator) hostname(config *configs.Config) error {
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
return fmt.Errorf("unable to set hostname without a private UTS namespace")
}
return nil
}
func (v *ConfigValidator) security(config *configs.Config) error {
// restrict sys without mount namespace
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
if config.ProcessLabel != "" && !selinux.GetEnabled() {
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
}
return nil
}
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
if config.Namespaces.Contains(configs.NEWUSER) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
}
} else {
if config.UidMappings != nil || config.GidMappings != nil {
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
}
}
return nil
}
// sysctl validates that the specified sysctl keys are valid or not.
// /proc/sys isn't completely namespaced and depending on which namespaces
// are specified, a subset of sysctls are permitted.
func (v *ConfigValidator) sysctl(config *configs.Config) error {
validSysctlMap := map[string]bool{
"kernel.msgmax": true,
"kernel.msgmnb": true,
"kernel.msgmni": true,
"kernel.sem": true,
"kernel.shmall": true,
"kernel.shmmax": true,
"kernel.shmmni": true,
"kernel.shm_rmid_forced": true,
}
for s := range config.Sysctl {
if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
if config.Namespaces.Contains(configs.NEWIPC) {
continue
} else {
return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
}
}
if strings.HasPrefix(s, "net.") {
if config.Namespaces.Contains(configs.NEWNET) {
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
if err := checkHostNs(s, path); err != nil {
return err
}
}
continue
} else {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
}
}
return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
}
return nil
}
func isSymbolicLink(path string) (bool, error) {
fi, err := os.Lstat(path)
if err != nil {
return false, err
}
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
}
// checkHostNs checks whether network sysctl is used in host namespace.
func checkHostNs(sysctlConfig string, path string) error {
var currentProcessNetns = "/proc/self/ns/net"
// readlink on the current processes network namespace
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
if err != nil {
return fmt.Errorf("read soft link %q error", currentProcessNetns)
}
// First check if the provided path is a symbolic link
symLink, err := isSymbolicLink(path)
if err != nil {
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
}
if symLink == false {
// The provided namespace is not a symbolic link,
// it is not the host namespace.
return nil
}
// readlink on the path provided in the struct
destOfContainer, err := os.Readlink(path)
if err != nil {
return fmt.Errorf("read soft link %q error", path)
}
if destOfContainer == destOfCurrentProcess {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
}
return nil
}

View file

@ -0,0 +1,17 @@
package libcontainer
import (
"io"
"os"
)
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriteCloser
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
File() *os.File
}

View file

@ -0,0 +1,13 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View file

@ -0,0 +1,152 @@
package libcontainer
import (
"fmt"
"os"
"unsafe"
"golang.org/x/sys/unix"
)
func ConsoleFromFile(f *os.File) Console {
return &linuxConsole{
master: f,
}
}
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// linuxConsole is a linux pseudo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) File() *os.File {
return c.master
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount() error {
oldMask := unix.Umask(0000)
defer unix.Umask(oldMask)
f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(unix.O_RDWR)
if err != nil {
return err
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := unix.Dup3(fd, i, 0); err != nil {
return err
}
}
return nil
}
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := unix.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
if err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}
// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
// created by us acts normally. In particular, a not-very-well-known default of
// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
// problem for terminal emulators, because we relay data from the terminal we
// also relay that funky line discipline.
func SaneTerminal(terminal *os.File) error {
termios, err := unix.IoctlGetTermios(int(terminal.Fd()), unix.TCGETS)
if err != nil {
return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
}
// Set -onlcr so we don't have to deal with \r.
termios.Oflag &^= unix.ONLCR
if err := unix.IoctlSetTermios(int(terminal.Fd()), unix.TCSETS, termios); err != nil {
return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
}
return nil
}

View file

@ -0,0 +1,11 @@
package libcontainer
import (
"errors"
)
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View file

@ -0,0 +1,30 @@
package libcontainer
// newConsole returns an initialized console that can be used within a container
func newConsole() (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows pseudo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View file

@ -0,0 +1,166 @@
// Package libcontainer provides a native Go implementation for creating containers
// with namespaces, cgroups, capabilities, and filesystem access controls.
// It allows you to manage the lifecycle of the container performing additional operations
// after the container is created.
package libcontainer
import (
"os"
"time"
"github.com/opencontainers/runc/libcontainer/configs"
)
// Status is the status of a container.
type Status int
const (
// Created is the status that denotes the container exists but has not been run yet.
Created Status = iota
// Running is the status that denotes the container exists and is running.
Running
// Pausing is the status that denotes the container exists, it is in the process of being paused.
Pausing
// Paused is the status that denotes the container exists, but all its processes are paused.
Paused
// Stopped is the status that denotes the container does not have a created or running process.
Stopped
)
func (s Status) String() string {
switch s {
case Created:
return "created"
case Running:
return "running"
case Pausing:
return "pausing"
case Paused:
return "paused"
case Stopped:
return "stopped"
default:
return "unknown"
}
}
// BaseState represents the platform agnostic pieces relating to a
// running container's state
type BaseState struct {
// ID is the container ID.
ID string `json:"id"`
// InitProcessPid is the init process id in the parent namespace.
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time in clock cycles since boot time.
InitProcessStartTime uint64 `json:"init_process_start"`
// Created is the unix timestamp for the creation time of the container in UTC
Created time.Time `json:"created"`
// Config is the container's configuration.
Config configs.Config `json:"config"`
}
// BaseContainer is a libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found. BaseContainer includes methods that are platform agnostic.
type BaseContainer interface {
// Returns the ID of the container
ID() string
// Returns the current status of the container.
//
// errors:
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// SystemError - System error.
State() (*State, error)
// Returns the current config of the container.
Config() configs.Config
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
//
// errors:
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
Processes() ([]int, error)
// Returns statistics for the container.
//
// errors:
// ContainerNotExists - Container no longer exists,
// Systemerror - System error.
Stats() (*Stats, error)
// Set resources of container as configured
//
// We can use this to change resources when containers are running.
//
// errors:
// SystemError - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
// start. You can track process lifecycle with passed Process structure.
//
// errors:
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Start(process *Process) (err error)
// Run immediately starts the process inside the container. Returns error if process
// fails to start. It does not block waiting for the exec fifo after start returns but
// opens the fifo after start returns.
//
// errors:
// ContainerNotExists - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Run(process *Process) (err error)
// Destroys the container, if its in a valid state, after killing any
// remaining running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// Running containers must first be stopped using Signal(..).
// Paused containers must first be resumed using Resume(..).
//
// errors:
// ContainerNotStopped - Container is still running,
// ContainerPaused - Container is paused,
// SystemError - System error.
Destroy() error
// Signal sends the provided signal code to the container's initial process.
//
// If all is specified the signal is sent to all processes in the container
// including the initial process.
//
// errors:
// SystemError - System error.
Signal(s os.Signal, all bool) error
// Exec signals the container to exec the users process at the end of the init.
//
// errors:
// SystemError - System error.
Exec() error
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -0,0 +1,20 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View file

@ -0,0 +1,37 @@
package libcontainer
// cgroup restoring strategy provided by criu
type cgMode uint32
const (
CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
)
type CriuPageServerInfo struct {
Address string // IP address of CRIU page server
Port int32 // port number of CRIU page server
}
type VethPairName struct {
ContainerInterfaceName string
HostInterfaceName string
}
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
ParentImage string // direcotry for storing parent image files in pre-dump and dump
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
FileLocks bool // handle file locks, for safety
PreDump bool // call criu predump to perform iterative checkpoint
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
}

View file

@ -0,0 +1,6 @@
package libcontainer
// TODO Windows: This can ultimately be entirely factored out as criu is
// a Unix concept not relevant on Windows.
type CriuOpts struct {
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,209 @@
syntax = "proto2";
message criu_page_server_info {
optional string address = 1;
optional int32 port = 2;
optional int32 pid = 3;
optional int32 fd = 4;
}
message criu_veth_pair {
required string if_in = 1;
required string if_out = 2;
};
message ext_mount_map {
required string key = 1;
required string val = 2;
};
message join_namespace {
required string ns = 1;
required string ns_file = 2;
optional string extra_opt = 3;
}
message inherit_fd {
required string key = 1;
required int32 fd = 2;
};
message cgroup_root {
optional string ctrl = 1;
required string path = 2;
};
message unix_sk {
required uint32 inode = 1;
};
enum criu_cg_mode {
IGNORE = 0;
CG_NONE = 1;
PROPS = 2;
SOFT = 3;
FULL = 4;
STRICT = 5;
DEFAULT = 6;
};
message criu_opts {
required int32 images_dir_fd = 1;
optional int32 pid = 2; /* if not set on dump, will dump requesting process */
optional bool leave_running = 3;
optional bool ext_unix_sk = 4;
optional bool tcp_established = 5;
optional bool evasive_devices = 6;
optional bool shell_job = 7;
optional bool file_locks = 8;
optional int32 log_level = 9 [default = 2];
optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */
optional criu_page_server_info ps = 11;
optional bool notify_scripts = 12;
optional string root = 13;
optional string parent_img = 14;
optional bool track_mem = 15;
optional bool auto_dedup = 16;
optional int32 work_dir_fd = 17;
optional bool link_remap = 18;
repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */
optional uint32 cpu_cap = 20 [default = 0xffffffff];
optional bool force_irmap = 21;
repeated string exec_cmd = 22;
repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */
optional bool manage_cgroups = 24; /* backward compatibility */
repeated cgroup_root cg_root = 25;
optional bool rst_sibling = 26; /* swrk only */
repeated inherit_fd inherit_fd = 27; /* swrk only */
optional bool auto_ext_mnt = 28;
optional bool ext_sharing = 29;
optional bool ext_masters = 30;
repeated string skip_mnt = 31;
repeated string enable_fs = 32;
repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */
optional criu_cg_mode manage_cgroups_mode = 34;
optional uint32 ghost_limit = 35 [default = 0x100000];
repeated string irmap_scan_paths = 36;
repeated string external = 37;
optional uint32 empty_ns = 38;
repeated join_namespace join_ns = 39;
optional string cgroup_props = 41;
optional string cgroup_props_file = 42;
repeated string cgroup_dump_controller = 43;
optional string freeze_cgroup = 44;
optional uint32 timeout = 45;
optional bool tcp_skip_in_flight = 46;
optional bool weak_sysctls = 47;
optional bool lazy_pages = 48;
optional int32 status_fd = 49;
optional bool orphan_pts_master = 50;
}
message criu_dump_resp {
optional bool restored = 1;
}
message criu_restore_resp {
required int32 pid = 1;
}
message criu_notify {
optional string script = 1;
optional int32 pid = 2;
}
enum criu_req_type {
EMPTY = 0;
DUMP = 1;
RESTORE = 2;
CHECK = 3;
PRE_DUMP = 4;
PAGE_SERVER = 5;
NOTIFY = 6;
CPUINFO_DUMP = 7;
CPUINFO_CHECK = 8;
FEATURE_CHECK = 9;
VERSION = 10;
}
/*
* List of features which can queried via
* CRIU_REQ_TYPE__FEATURE_CHECK
*/
message criu_features {
optional bool mem_track = 1;
optional bool lazy_pages = 2;
}
/*
* Request -- each type corresponds to must-be-there
* request arguments of respective type
*/
message criu_req {
required criu_req_type type = 1;
optional criu_opts opts = 2;
optional bool notify_success = 3;
/*
* When set service won't close the connection but
* will wait for more req-s to appear. Works not
* for all request types.
*/
optional bool keep_open = 4;
/*
* 'features' can be used to query which features
* are supported by the installed criu/kernel
* via RPC.
*/
optional criu_features features = 5;
}
/*
* Response -- it states whether the request was served
* and additional request-specific information
*/
message criu_resp {
required criu_req_type type = 1;
required bool success = 2;
optional criu_dump_resp dump = 3;
optional criu_restore_resp restore = 4;
optional criu_notify notify = 5;
optional criu_page_server_info ps = 6;
optional int32 cr_errno = 7;
optional criu_features features = 8;
optional string cr_errmsg = 9;
optional criu_version version = 10;
}
/* Answer for criu_req_type.VERSION requests */
message criu_version {
required int32 major = 1;
required int32 minor = 2;
optional string gitid = 3;
optional int32 sublevel = 4;
optional int32 extra = 5;
optional string name = 6;
}

View file

@ -0,0 +1,70 @@
package libcontainer
import "io"
// ErrorCode is the API error code type.
type ErrorCode int
// API error codes.
const (
// Factory errors
IdInUse ErrorCode = iota
InvalidIdFormat
// Container errors
ContainerNotExists
ContainerPaused
ContainerNotStopped
ContainerNotRunning
ContainerNotPaused
// Process errors
NoProcessOps
// Common errors
ConfigInvalid
ConsoleExists
SystemError
)
func (c ErrorCode) String() string {
switch c {
case IdInUse:
return "Id already in use"
case InvalidIdFormat:
return "Invalid format"
case ContainerPaused:
return "Container paused"
case ConfigInvalid:
return "Invalid configuration"
case SystemError:
return "System error"
case ContainerNotExists:
return "Container does not exist"
case ContainerNotStopped:
return "Container is not stopped"
case ContainerNotRunning:
return "Container is not running"
case ConsoleExists:
return "Console exists for process"
case ContainerNotPaused:
return "Container is not paused"
case NoProcessOps:
return "No process operations"
default:
return "Unknown error"
}
}
// Error is the API error type.
type Error interface {
error
// Returns an error if it failed to write the detail of the Error to w.
// The detail of the Error may include the error message and a
// representation of the stack trace.
Detail(w io.Writer) error
// Returns the error code for this error.
Code() ErrorCode
}

View file

@ -0,0 +1,44 @@
package libcontainer
import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type Factory interface {
// Creates a new container with the given id and starts the initial process inside it.
// id must be a string containing only letters, digits and underscores and must contain
// between 1 and 1024 characters, inclusive.
//
// The id must not already be in use by an existing container. Containers created using
// a factory with the same path (and filesystem) must have distinct ids.
//
// Returns the new container with a running process.
//
// errors:
// IdInUse - id is already in use by a container
// InvalidIdFormat - id has incorrect format
// ConfigInvalid - config is invalid
// Systemerror - System error
//
// On error, any partially created container parts are cleaned up (the operation is atomic).
Create(id string, config *configs.Config) (Container, error)
// Load takes an ID for an existing container and returns the container information
// from the state. This presents a read only view of the container.
//
// errors:
// Path does not exist
// System error
Load(id string) (Container, error)
// StartInitialization is an internal API to libcontainer used during the reexec of the
// container.
//
// Errors:
// Pipe connection error
// System error
StartInitialization() error
// Type returns info string about factory type (e.g. lxc, libcontainer...)
Type() string
}

View file

@ -0,0 +1,325 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"runtime/debug"
"strconv"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
const (
stateFilename = "state.json"
execFifoFilename = "exec.fifo"
)
var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init binary path and arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) (err error) {
if len(args) > 0 {
// Resolve relative paths to ensure that its available
// after directory changes.
if args[0], err = filepath.Abs(args[0]); err != nil {
return newGenericError(err, ConfigInvalid)
}
}
l.InitArgs = args
return nil
}
}
// SystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func SystemdCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &systemd.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// Cgroupfs is an options func to configure a LinuxFactory to return
// containers that use the native cgroups filesystem implementation to
// create and manage cgroups.
func Cgroupfs(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &fs.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// RootlessCgroups is an options func to configure a LinuxFactory to
// return containers that use the "rootless" cgroup manager, which will
// fail to do any operations not possible to do with an unprivileged user.
// It should only be used in conjunction with rootless containers.
func RootlessCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &rootless.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
if err != nil {
return err
}
if !mounted {
if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
return err
}
}
return nil
}
// CriuPath returns an option func to configure a LinuxFactory with the
// provided criupath
func CriuPath(criupath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.CriuPath = criupath
return nil
}
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
if root != "" {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
}
l := &LinuxFactory{
Root: root,
InitArgs: []string{"/proc/self/exe", "init"},
Validator: validate.New(),
CriuPath: "criu",
}
Cgroupfs(l)
for _, opt := range options {
if err := opt(l); err != nil {
return nil, err
}
}
return l, nil
}
// LinuxFactory implements the default factory interface for linux based systems.
type LinuxFactory struct {
// Root directory for the factory to store state.
Root string
// InitArgs are arguments for calling the init responsibilities for spawning
// a container.
InitArgs []string
// CriuPath is the path to the criu binary used for checkpoint and restore of
// containers.
CriuPath string
// Validator provides validation to container configurations.
Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
if err := l.validateID(id); err != nil {
return nil, err
}
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0711); err != nil {
return nil, newGenericError(err, SystemError)
}
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
return nil, newGenericError(err, SystemError)
}
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
id: id,
root: containerRoot,
config: config,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
c.state = &stoppedState{c: c}
return c, nil
}
func (l *LinuxFactory) Load(id string) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
state, err := l.loadState(containerRoot, id)
if err != nil {
return nil, err
}
r := &nonChildProcess{
processPid: state.InitProcessPid,
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
id: id,
config: &state.Config,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
}
c.state = &loadedState{c: c}
if err := c.refreshState(); err != nil {
return nil, err
}
return c, nil
}
func (l *LinuxFactory) Type() string {
return "libcontainer"
}
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
var (
pipefd, rootfd int
consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envStateDir = os.Getenv("_LIBCONTAINER_STATEDIR")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
)
// Get the INITPIPE.
pipefd, err = strconv.Atoi(envInitPipe)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
defer pipe.Close()
// Only init processes have STATEDIR.
rootfd = -1
if it == initStandard {
if rootfd, err = strconv.Atoi(envStateDir); err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_STATEDIR=%s to int: %s", envStateDir, err)
}
}
if envConsole != "" {
console, err := strconv.Atoi(envConsole)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
}
consoleSocket = os.NewFile(uintptr(console), "console-socket")
defer consoleSocket.Close()
}
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
defer func() {
// We have an error during the initialization of the container's init,
// send it back to the parent process in the form of an initError.
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
}()
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
}
}()
i, err := newContainerInit(it, pipe, consoleSocket, rootfd)
if err != nil {
return err
}
// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
return i.Init()
}
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
f, err := os.Open(filepath.Join(root, stateFilename))
if err != nil {
if os.IsNotExist(err) {
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
}
return nil, newGenericError(err, SystemError)
}
defer f.Close()
var state *State
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, newGenericError(err, SystemError)
}
return state, nil
}
func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
return nil
}

View file

@ -0,0 +1,92 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
"github.com/opencontainers/runc/libcontainer/stacktrace"
)
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}
Message: {{.Message}}
{{end}}
Frames:{{range $i, $frame := .Stack.Frames}}
---
{{$i}}: {{$frame.Function}}
Package: {{$frame.Package}}
File: {{$frame.File}}@{{$frame.Line}}{{end}}
`))
func newGenericError(err error, c ErrorCode) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: c,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
func newSystemError(err error) Error {
return createSystemError(err, "")
}
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
return createSystemError(err, fmt.Sprintf(cause, v...))
}
func newSystemErrorWithCause(err error, cause string) Error {
return createSystemError(err, cause)
}
// createSystemError creates the specified error with the correct number of
// stack frames skipped. This is only to be called by the other functions for
// formatting the error.
func createSystemError(err error, cause string) Error {
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Cause: cause,
Stack: stacktrace.Capture(2),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Cause string
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
if e.Cause == "" {
return e.Message
}
frame := e.Stack.Frames[0]
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
}
func (e *genericError) Code() ErrorCode {
return e.ECode
}
func (e *genericError) Detail(w io.Writer) error {
return errorTemplate.Execute(w, e)
}

View file

@ -0,0 +1,502 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"net"
"os"
"strings"
"syscall" // only for Errno
"unsafe"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
type initType string
const (
initSetns initType = "setns"
initStandard initType = "standard"
)
type pid struct {
Pid int `json:"pid"`
}
// network is an internal struct used to setup container networks.
type network struct {
configs.Network
// TempVethPeerName is a unique temporary veth peer name that was placed into
// the container's namespace.
TempVethPeerName string `json:"temp_veth_peer_name"`
}
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities *configs.Capabilities `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"`
Rootless bool `json:"rootless"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, stateDirFD int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return nil, err
}
switch t {
case initSetns:
return &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
consoleSocket: consoleSocket,
parentPid: unix.Getppid(),
config: config,
stateDirFD: stateDirFD,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
}
// populateProcessEnvironment loads the provided environment variables into the
// current processes's environment.
func populateProcessEnvironment(env []string) error {
for _, pair := range env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment '%v'", pair)
}
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}
return nil
}
// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
func finalizeNamespace(config *initConfig) error {
// Ensure that all unwanted fds we may have accidentally
// inherited are marked close-on-exec so they stay out of the
// container
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
return err
}
capabilities := &configs.Capabilities{}
if config.Capabilities != nil {
capabilities = config.Capabilities
} else if config.Config.Capabilities != nil {
capabilities = config.Config.Capabilities
}
w, err := newContainerCapList(capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.ApplyBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
if err := system.SetKeepCaps(); err != nil {
return err
}
if err := setupUser(config); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return err
}
if err := w.ApplyCaps(); err != nil {
return err
}
if config.Cwd != "" {
if err := unix.Chdir(config.Cwd); err != nil {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
}
}
return nil
}
// setupConsole sets up the console from inside the container, and sends the
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
// consoles are scoped to a container properly (see runc#814 and the many
// issues related to that). This has to be run *after* we've pivoted to the new
// rootfs (and the users' configuration is entirely set up).
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
defer socket.Close()
// At this point, /dev/ptmx points to something that we would expect. We
// used to change the owner of the slave path, but since the /dev/pts mount
// can have gid=X set (at the users' option). So touching the owner of the
// slave PTY is not necessary, as the kernel will handle that for us. Note
// however, that setupUser (specifically fixStdioPermissions) *will* change
// the UID owner of the console to be the user the process will run as (so
// they can actually control their console).
console, err := newConsole()
if err != nil {
return err
}
// After we return from here, we don't need the console anymore.
defer console.Close()
linuxConsole, ok := console.(*linuxConsole)
if !ok {
return fmt.Errorf("failed to cast console to *linuxConsole")
}
// Mount the console inside our rootfs.
if mount {
if err := linuxConsole.mount(); err != nil {
return err
}
}
// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(socket, linuxConsole.File()); err != nil {
return err
}
// Now, dup over all the things.
return linuxConsole.dupStdio()
}
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
if err := readSync(pipe, procRun); err != nil {
return err
}
return nil
}
// syncParentHooks sends to the given pipe a JSON payload which indicates that
// the parent should execute pre-start hooks. It then waits for the parent to
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
// Tell parent.
if err := writeSync(pipe, procHooks); err != nil {
return err
}
// Wait for parent to give the all-clear.
if err := readSync(pipe, procResume); err != nil {
return err
}
return nil
}
// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: 0,
Gid: 0,
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
var addGroups []int
if len(config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}
if config.Rootless {
if execUser.Uid != 0 {
return fmt.Errorf("cannot run as a non-root user in a rootless container")
}
if execUser.Gid != 0 {
return fmt.Errorf("cannot run as a non-root group in a rootless container")
}
// We cannot set any additional groups in a rootless container and thus we
// bail if the user asked us to do so. TODO: We currently can't do this
// earlier, but if libcontainer.Process.User was typesafe this might work.
if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container")
}
}
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(config, execUser); err != nil {
return err
}
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
// There's nothing we can do about /etc/group entries, so we silently
// ignore setting groups here (since the user didn't explicitly ask us to
// set the group).
if !config.Rootless {
suppGroups := append(execUser.Sgids, addGroups...)
if err := unix.Setgroups(suppGroups); err != nil {
return err
}
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
}
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
var null unix.Stat_t
if err := unix.Stat("/dev/null", &null); err != nil {
return err
}
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stderr.Fd(),
os.Stdout.Fd(),
} {
var s unix.Stat_t
if err := unix.Fstat(int(fd), &s); err != nil {
return err
}
// Skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
// Skip chown if s.Gid is actually an unmapped gid in the host. While
// this is a bit dodgy if it just so happens that the console _is_
// owned by overflow_gid, there's no way for us to disambiguate this as
// a userspace program.
if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
continue
}
// We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
return err
}
}
return nil
}
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.initialize(config); err != nil {
return err
}
}
return nil
}
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
_, dst, err := net.ParseCIDR(config.Destination)
if err != nil {
return err
}
src := net.ParseIP(config.Source)
if src == nil {
return fmt.Errorf("Invalid source for route: %s", config.Source)
}
gw := net.ParseIP(config.Gateway)
if gw == nil {
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
}
l, err := netlink.LinkByName(config.InterfaceName)
if err != nil {
return err
}
route := &netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Dst: dst,
Src: src,
Gw: gw,
LinkIndex: l.Attrs().Index,
}
if err := netlink.RouteAdd(route); err != nil {
return err
}
}
return nil
}
func setupRlimits(limits []configs.Rlimit, pid int) error {
for _, rlimit := range limits {
if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
const _P_PID = 1
type siginfo struct {
si_signo int32
si_errno int32
si_code int32
// below here is a union; si_pid is the only field we use
si_pid int32
// Pad to 128 bytes as detailed in blockUntilWaitable
pad [96]byte
}
// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
si := &siginfo{}
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
if e != 0 {
return false, os.NewSyscallError("waitid", e)
}
return si.si_pid != 0, nil
}
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
func isNoChildren(err error) bool {
switch err := err.(type) {
case syscall.Errno:
if err == unix.ECHILD {
return true
}
case *os.SyscallError:
if err.Err == unix.ECHILD {
return true
}
}
return false
}
// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL then it will wait for each process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
}
pids, err := m.GetAllPids()
if err != nil {
m.Freeze(configs.Thawed)
return err
}
for _, pid := range pids {
p, err := os.FindProcess(pid)
if err != nil {
logrus.Warn(err)
continue
}
procs = append(procs, p)
if err := p.Signal(s); err != nil {
logrus.Warn(err)
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}
for _, p := range procs {
if s != unix.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil {
if !isNoChildren(err) {
logrus.Warn("signalAllProcesses: ", p.Pid, err)
}
continue
} else if !ok {
// Not ready to report so don't wait
continue
}
}
if _, err := p.Wait(); err != nil {
if !isNoChildren(err) {
logrus.Warn("wait: ", err)
}
}
}
return nil
}

View file

@ -0,0 +1,50 @@
// +build linux
package keys
import (
"fmt"
"strconv"
"strings"
"golang.org/x/sys/unix"
)
type KeySerial uint32
func JoinSessionKeyring(name string) (KeySerial, error) {
sessKeyId, err := unix.KeyctlJoinSessionKeyring(name)
if err != nil {
return 0, fmt.Errorf("could not create session key: %v", err)
}
return KeySerial(sessKeyId), nil
}
// ModKeyringPerm modifies permissions on a keyring by reading the current permissions,
// anding the bits with the given mask (clearing permissions) and setting
// additional permission bits
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId))
if err != nil {
return err
}
res := strings.Split(string(dest), ";")
if len(res) < 5 {
return fmt.Errorf("Destination buffer for key description is too small")
}
// parse permissions
perm64, err := strconv.ParseUint(res[3], 16, 32)
if err != nil {
return err
}
perm := (uint32(perm64) & mask) | setbits
if err := unix.KeyctlSetperm(int(ringId), perm); err != nil {
return err
}
return nil
}

View file

@ -0,0 +1,87 @@
// +build linux
package libcontainer
import (
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// list of known message types we want to send to bootstrap program
// The number is randomly chosen to not conflict with known netlink types
const (
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
NsPathsAttr uint16 = 27282
UidmapAttr uint16 = 27283
GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286
RootlessAttr uint16 = 27287
)
type Int32msg struct {
Type uint16
Value uint32
}
// Serialize serializes the message.
// Int32msg has the following representation
// | nlattr len | nlattr type |
// | uint32 value |
func (msg *Int32msg) Serialize() []byte {
buf := make([]byte, msg.Len())
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(msg.Len()))
native.PutUint16(buf[2:4], msg.Type)
native.PutUint32(buf[4:8], msg.Value)
return buf
}
func (msg *Int32msg) Len() int {
return unix.NLA_HDRLEN + 4
}
// Bytemsg has the following representation
// | nlattr len | nlattr type |
// | value | pad |
type Bytemsg struct {
Type uint16
Value []byte
}
func (msg *Bytemsg) Serialize() []byte {
l := msg.Len()
buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1))
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(l))
native.PutUint16(buf[2:4], msg.Type)
copy(buf[4:], msg.Value)
return buf
}
func (msg *Bytemsg) Len() int {
return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
}
type Boolmsg struct {
Type uint16
Value bool
}
func (msg *Boolmsg) Serialize() []byte {
buf := make([]byte, msg.Len())
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(msg.Len()))
native.PutUint16(buf[2:4], msg.Type)
if msg.Value {
buf[4] = 1
} else {
buf[4] = 0
}
return buf
}
func (msg *Boolmsg) Len() int {
return unix.NLA_HDRLEN + 1
}

View file

@ -0,0 +1,259 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"net"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink"
)
var strategies = map[string]networkStrategy{
"veth": &veth{},
"loopback": &loopback{},
}
// networkStrategy represents a specific network configuration for
// a container's networking stack
type networkStrategy interface {
create(*network, int) error
initialize(*network) error
detach(*configs.Network) error
attach(*configs.Network) error
}
// getStrategy returns the specific network strategy for the
// provided type.
func getStrategy(tpe string) (networkStrategy, error) {
s, exists := strategies[tpe]
if !exists {
return nil, fmt.Errorf("unknown strategy type %q", tpe)
}
return s, nil
}
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
out := &NetworkInterface{Name: interfaceName}
// This can happen if the network runtime information is missing - possible if the
// container was created by an old version of libcontainer.
if interfaceName == "" {
return out, nil
}
type netStatsPair struct {
// Where to write the output.
Out *uint64
// The network stats file to read.
File string
}
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
netStats := []netStatsPair{
{Out: &out.RxBytes, File: "tx_bytes"},
{Out: &out.RxPackets, File: "tx_packets"},
{Out: &out.RxErrors, File: "tx_errors"},
{Out: &out.RxDropped, File: "tx_dropped"},
{Out: &out.TxBytes, File: "rx_bytes"},
{Out: &out.TxPackets, File: "rx_packets"},
{Out: &out.TxErrors, File: "rx_errors"},
{Out: &out.TxDropped, File: "rx_dropped"},
}
for _, netStat := range netStats {
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
if err != nil {
return nil, err
}
*(netStat.Out) = data
}
return out, nil
}
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
}
// loopback is a network strategy that provides a basic loopback device
type loopback struct {
}
func (l *loopback) create(n *network, nspid int) error {
return nil
}
func (l *loopback) initialize(config *network) error {
return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}})
}
func (l *loopback) attach(n *configs.Network) (err error) {
return nil
}
func (l *loopback) detach(n *configs.Network) (err error) {
return nil
}
// veth is a network strategy that uses a bridge and creates
// a veth pair, one that is attached to the bridge on the host and the other
// is placed inside the container's namespace
type veth struct {
}
func (v *veth) detach(n *configs.Network) (err error) {
return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
}
// attach a container network interface to an external network
func (v *veth) attach(n *configs.Network) (err error) {
brl, err := netlink.LinkByName(n.Bridge)
if err != nil {
return err
}
br, ok := brl.(*netlink.Bridge)
if !ok {
return fmt.Errorf("Wrong device type %T", brl)
}
host, err := netlink.LinkByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.LinkSetMaster(host, br); err != nil {
return err
}
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
return err
}
if n.HairpinMode {
if err := netlink.LinkSetHairpin(host, true); err != nil {
return err
}
}
if err := netlink.LinkSetUp(host); err != nil {
return err
}
return nil
}
func (v *veth) create(n *network, nspid int) (err error) {
tmpName, err := v.generateTempPeerName()
if err != nil {
return err
}
n.TempVethPeerName = tmpName
if n.Bridge == "" {
return fmt.Errorf("bridge is not specified")
}
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: n.HostInterfaceName,
TxQLen: n.TxQueueLen,
},
PeerName: n.TempVethPeerName,
}
if err := netlink.LinkAdd(veth); err != nil {
return err
}
defer func() {
if err != nil {
netlink.LinkDel(veth)
}
}()
if err := v.attach(&n.Network); err != nil {
return err
}
child, err := netlink.LinkByName(n.TempVethPeerName)
if err != nil {
return err
}
return netlink.LinkSetNsPid(child, nspid)
}
func (v *veth) generateTempPeerName() (string, error) {
return utils.GenerateRandomName("veth", 7)
}
func (v *veth) initialize(config *network) error {
peer := config.TempVethPeerName
if peer == "" {
return fmt.Errorf("peer is not specified")
}
child, err := netlink.LinkByName(peer)
if err != nil {
return err
}
if err := netlink.LinkSetDown(child); err != nil {
return err
}
if err := netlink.LinkSetName(child, config.Name); err != nil {
return err
}
// get the interface again after we changed the name as the index also changes.
if child, err = netlink.LinkByName(config.Name); err != nil {
return err
}
if config.MacAddress != "" {
mac, err := net.ParseMAC(config.MacAddress)
if err != nil {
return err
}
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
return err
}
}
ip, err := netlink.ParseAddr(config.Address)
if err != nil {
return err
}
if err := netlink.AddrAdd(child, ip); err != nil {
return err
}
if config.IPv6Address != "" {
ip6, err := netlink.ParseAddr(config.IPv6Address)
if err != nil {
return err
}
if err := netlink.AddrAdd(child, ip6); err != nil {
return err
}
}
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
return err
}
if err := netlink.LinkSetUp(child); err != nil {
return err
}
if config.Gateway != "" {
gw := net.ParseIP(config.Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}
if config.IPv6Gateway != "" {
gw := net.ParseIP(config.IPv6Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,90 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"golang.org/x/sys/unix"
)
const oomCgroupName = "memory"
type PressureLevel uint
const (
LowPressure PressureLevel = iota
MediumPressure
CriticalPressure
)
func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) {
evFile, err := os.Open(filepath.Join(cgDir, evName))
if err != nil {
return nil, err
}
fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
if err != nil {
evFile.Close()
return nil, err
}
eventfd := os.NewFile(uintptr(fd), "eventfd")
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
evFile.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
evFile.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}
// notifyOnOOM returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
dir := paths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("path %q missing", oomCgroupName)
}
return registerMemoryEvent(dir, "memory.oom_control", "")
}
func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
dir := paths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("path %q missing", oomCgroupName)
}
if level > CriticalPressure {
return nil, fmt.Errorf("invalid pressure level %d", level)
}
levelStr := []string{"low", "medium", "critical"}[level]
return registerMemoryEvent(dir, "memory.pressure_level", levelStr)
}

View file

@ -0,0 +1,106 @@
package libcontainer
import (
"fmt"
"io"
"math"
"os"
"github.com/opencontainers/runc/libcontainer/configs"
)
type processOperations interface {
wait() (*os.ProcessState, error)
signal(sig os.Signal) error
pid() int
}
// Process specifies the configuration and IO for a process inside
// a container.
type Process struct {
// The command to be run followed by any arguments.
Args []string
// Env specifies the environment variables for the process.
Env []string
// User will set the uid and gid of the executing process running inside the container
// local to the container's user and group configuration.
User string
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string
// Cwd will change the processes current working directory inside the container's rootfs.
Cwd string
// Stdin is a pointer to a reader which provides the standard input stream.
Stdin io.Reader
// Stdout is a pointer to a writer which receives the standard output stream.
Stdout io.Writer
// Stderr is a pointer to a writer which receives the standard error stream.
Stderr io.Writer
// ExtraFiles specifies additional open files to be inherited by the container
ExtraFiles []*os.File
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask
Capabilities *configs.Capabilities
// AppArmorProfile specifies the profile to apply to the process and is
// changed at the time the process is execed
AppArmorProfile string
// Label specifies the label to apply to the process. It is commonly used by selinux
Label string
// NoNewPrivileges controls whether processes can gain additional privileges.
NoNewPrivileges *bool
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []configs.Rlimit
// ConsoleSocket provides the masterfd console.
ConsoleSocket *os.File
ops processOperations
}
// Wait waits for the process to exit.
// Wait releases any resources associated with the Process
func (p Process) Wait() (*os.ProcessState, error) {
if p.ops == nil {
return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
}
return p.ops.wait()
}
// Pid returns the process ID
func (p Process) Pid() (int, error) {
// math.MinInt32 is returned here, because it's invalid value
// for the kill() system call.
if p.ops == nil {
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
}
return p.ops.pid(), nil
}
// Signal sends a signal to the Process.
func (p Process) Signal(sig os.Signal) error {
if p.ops == nil {
return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
}
return p.ops.signal(sig)
}
// IO holds the process's STDIO
type IO struct {
Stdin io.WriteCloser
Stdout io.ReadCloser
Stderr io.ReadCloser
}

View file

@ -0,0 +1,493 @@
// +build linux
package libcontainer
import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall" // only for Signal
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
type parentProcess interface {
// pid returns the pid for the running process.
pid() int
// start starts the process execution.
start() error
// send a SIGKILL to the process and wait for the exit.
terminate() error
// wait waits on the process returning the process state.
wait() (*os.ProcessState, error)
// startTime returns the process start time.
startTime() (uint64, error)
signal(os.Signal) error
externalDescriptors() []string
setExternalDescriptors(fds []string)
}
type setnsProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
cgroupPaths map[string]string
config *initConfig
fds []string
process *Process
bootstrapData io.Reader
}
func (p *setnsProcess) startTime() (uint64, error) {
stat, err := system.Stat(p.pid())
return stat.StartTime, err
}
func (p *setnsProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return unix.Kill(p.pid(), s)
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
err = p.cmd.Start()
p.childPipe.Close()
if err != nil {
return newSystemErrorWithCause(err, "starting setns process")
}
if p.bootstrapData != nil {
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
}
if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process")
}
// We can't join cgroups if we're in a rootless container.
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for process")
}
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return newSystemErrorWithCause(err, "writing config to pipe")
}
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
// This shouldn't happen.
panic("unexpected procReady in setns")
case procHooks:
// This shouldn't happen.
panic("unexpected procHooks in setns")
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
})
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return ierr
}
return nil
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
func (p *setnsProcess) execSetns() error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "waiting on setns process to finish")
}
if !status.Success() {
p.cmd.Wait()
return newSystemError(&exec.ExitError{ProcessState: status})
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "reading pid from init pipe")
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
p.cmd.Process = process
p.process.ops = p
return nil
}
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
// avoid the process becoming a zombie.
func (p *setnsProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *setnsProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
// Return actual ProcessState even on Wait error
return p.cmd.ProcessState, err
}
func (p *setnsProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *setnsProcess) externalDescriptors() []string {
return p.fds
}
func (p *setnsProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
config *initConfig
manager cgroups.Manager
container *linuxContainer
fds []string
process *Process
bootstrapData io.Reader
sharePidns bool
rootDir *os.File
}
func (p *initProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *initProcess) externalDescriptors() []string {
return p.fds
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
// This is called by initProcess.start function
func (p *initProcess) execSetns() error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return err
}
if !status.Success() {
p.cmd.Wait()
return &exec.ExitError{ProcessState: status}
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return err
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
p.cmd.Process = process
p.process.ops = p
return nil
}
func (p *initProcess) start() error {
defer p.parentPipe.Close()
err := p.cmd.Start()
p.process.ops = p
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
}
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
if err := p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "running exec setns process for init")
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
if err != nil {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children can escape the
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return newSystemErrorWithCause(err, "creating network interfaces")
}
if err := p.sendConfig(); err != nil {
return newSystemErrorWithCause(err, "sending config to init process")
}
var (
sentRun bool
sentResume bool
)
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for ready process")
}
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procRun); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'run'")
}
sentRun = true
case procHooks:
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
}
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procResume); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'resume'")
}
sentResume = true
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
return nil
})
if !sentRun {
return newSystemErrorWithCause(ierr, "container init")
}
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "shutting down init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return ierr
}
return nil
}
func (p *initProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
if err != nil {
return p.cmd.ProcessState, err
}
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.sharePidns {
signalAllProcesses(p.manager, unix.SIGKILL)
}
return p.cmd.ProcessState, nil
}
func (p *initProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *initProcess) startTime() (uint64, error) {
stat, err := system.Stat(p.pid())
return stat.StartTime, err
}
func (p *initProcess) sendConfig() error {
// send the config to the container's init process, we don't use JSON Encode
// here because there might be a problem in JSON decoder in some cases, see:
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
return utils.WriteJSON(p.parentPipe, p.config)
}
func (p *initProcess) createNetworkInterfaces() error {
for _, config := range p.config.Config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
n := &network{
Network: *config,
}
if err := strategy.create(n, p.pid()); err != nil {
return err
}
p.config.Networks = append(p.config.Networks, n)
}
return nil
}
func (p *initProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return unix.Kill(p.pid(), s)
}
func (p *initProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func getPipeFds(pid int) ([]string, error) {
fds := make([]string, 3)
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
for i := 0; i < 3; i++ {
// XXX: This breaks if the path is not a valid symlink (which can
// happen in certain particularly unlucky mount namespace setups).
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
// Ignore permission errors, for rootless containers and other
// non-dumpable processes. if we can't get the fd for a particular
// file, there's not much we can do.
if os.IsPermission(err) {
continue
}
return fds, err
}
fds[i] = target
}
return fds, nil
}
// InitializeIO creates pipes for use with the process's stdio and returns the
// opposite side for each. Do not use this if you want to have a pseudoterminal
// set up for you by libcontainer (TODO: fix that too).
// TODO: This is mostly unnecessary, and should be handled by clients.
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}
// cleanup in case of an error
defer func() {
if err != nil {
for _, fd := range fds {
unix.Close(int(fd))
}
}
}()
// STDIN
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.Stdin, i.Stdin = r, w
// STDOUT
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.Stdout, i.Stdout = w, r
// STDERR
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, r.Fd(), w.Fd())
p.Stderr, i.Stderr = w, r
// change ownership of the pipes incase we are in a user namespace
for _, fd := range fds {
if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
return nil, err
}
}
return i, nil
}

View file

@ -0,0 +1,122 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/opencontainers/runc/libcontainer/system"
)
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
var (
err error
)
proc, err := os.FindProcess(pid)
if err != nil {
return nil, err
}
stat, err := system.Stat(pid)
if err != nil {
return nil, err
}
return &restoredProcess{
proc: proc,
processStartTime: stat.StartTime,
fds: fds,
}, nil
}
type restoredProcess struct {
proc *os.Process
processStartTime uint64
fds []string
}
func (p *restoredProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *restoredProcess) pid() int {
return p.proc.Pid
}
func (p *restoredProcess) terminate() error {
err := p.proc.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *restoredProcess) wait() (*os.ProcessState, error) {
// TODO: how do we wait on the actual process?
// maybe use --exec-cmd in criu
st, err := p.proc.Wait()
if err != nil {
return nil, err
}
return st, nil
}
func (p *restoredProcess) startTime() (uint64, error) {
return p.processStartTime, nil
}
func (p *restoredProcess) signal(s os.Signal) error {
return p.proc.Signal(s)
}
func (p *restoredProcess) externalDescriptors() []string {
return p.fds
}
func (p *restoredProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
// nonChildProcess represents a process where the calling process is not
// the parent process. This process is created when a factory loads a container from
// a persisted state.
type nonChildProcess struct {
processPid int
processStartTime uint64
fds []string
}
func (p *nonChildProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *nonChildProcess) pid() int {
return p.processPid
}
func (p *nonChildProcess) terminate() error {
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
}
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *nonChildProcess) startTime() (uint64, error) {
return p.processStartTime, nil
}
func (p *nonChildProcess) signal(s os.Signal) error {
proc, err := os.FindProcess(p.processPid)
if err != nil {
return err
}
return proc.Signal(s)
}
func (p *nonChildProcess) externalDescriptors() []string {
return p.fds
}
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}

View file

@ -0,0 +1,812 @@
// +build linux
package libcontainer
import (
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/symlink"
"github.com/mrunalp/fileutils"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/sys/unix"
)
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
// needsSetupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
for _, m := range config.Mounts {
if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
return false
}
}
return true
}
// prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro. You must call
// finalizeRootfs after this function to finish setting up the rootfs.
func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemErrorWithCause(err, "preparing rootfs")
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
if err := mountCmd(precmd); err != nil {
return newSystemErrorWithCause(err, "running premount command")
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
}
for _, postcmd := range m.PostmountCmds {
if err := mountCmd(postcmd); err != nil {
return newSystemErrorWithCause(err, "running postmount command")
}
}
}
if setupDev {
if err := createDevices(config); err != nil {
return newSystemErrorWithCause(err, "creating device nodes")
}
if err := setupPtmx(config); err != nil {
return newSystemErrorWithCause(err, "setting up ptmx")
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemErrorWithCause(err, "setting up /dev symlinks")
}
}
// Signal the parent to run the pre-start hooks.
// The hooks are run after the mounts are setup, but before we switch to the new
// root, so that the old root is still available in the hooks for any mount
// manipulations.
if err := syncParentHooks(pipe); err != nil {
return err
}
// The reason these operations are done here rather than in finalizeRootfs
// is because the console-handling code gets quite sticky if we have to set
// up the console before doing the pivot_root(2). This is because the
// Console API has to also work with the ExecIn case, which means that the
// API must be able to deal with being inside as well as outside the
// container. It's just cleaner to do this here (at the expense of the
// operation not being perfectly split).
if err := unix.Chdir(config.Rootfs); err != nil {
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
} else {
err = pivotRoot(config.Rootfs)
}
if err != nil {
return newSystemErrorWithCause(err, "jailing process inside rootfs")
}
if setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
}
}
return nil
}
// finalizeRootfs sets anything to ro if necessary. You must call
// prepareRootfs first.
func finalizeRootfs(config *configs.Config) (err error) {
// remount dev as ro if specified
for _, m := range config.Mounts {
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
if err := remountReadonly(m); err != nil {
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
}
}
break
}
}
// set rootfs ( / ) as readonly
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemErrorWithCause(err, "setting rootfs as readonly")
}
}
unix.Umask(0022)
return nil
}
func mountCmd(cmd configs.Command) error {
command := exec.Command(cmd.Path, cmd.Args[:]...)
command.Env = cmd.Env
command.Dir = cmd.Dir
if out, err := command.CombinedOutput(); err != nil {
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
}
return nil
}
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
var (
dest = m.Destination
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
switch m.Device {
case "proc", "sysfs":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
// Selinux kernels do not support labeling of /proc or /sys
return mountPropagate(m, rootfs, "")
case "mqueue":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
// older kernels do not support labeling of /dev/mqueue
if err := mountPropagate(m, rootfs, ""); err != nil {
return err
}
return label.SetFileLabel(dest, mountLabel)
}
return nil
case "tmpfs":
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
tmpDir := ""
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
if copyUp {
tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
if err != nil {
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
}
defer os.RemoveAll(tmpDir)
m.Destination = tmpDir
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
if copyUp {
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
}
if stat != nil {
if err = os.Chmod(dest, stat.Mode()); err != nil {
return err
}
}
return nil
case "bind":
stat, err := os.Stat(m.Source)
if err != nil {
// error out if the source of a bind mount does not exist as we will be
// unable to bind anything to it.
return err
}
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
// bind mount won't change mount options, we need remount to make mount options effective.
// first check that we have non-default options required before attempting a remount
if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
// only remount if unique mount options are set
if err := remount(m, rootfs); err != nil {
return err
}
}
if m.Relabel != "" {
if err := label.Validate(m.Relabel); err != nil {
return err
}
shared := label.IsShared(m.Relabel)
if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
return err
}
}
case "cgroup":
binds, err := getCgroupMounts(m)
if err != nil {
return err
}
var merged []string
for _, b := range binds {
ss := filepath.Base(b.Destination)
if strings.Contains(ss, ",") {
merged = append(merged, ss)
}
}
tmpfs := &configs.Mount{
Source: "tmpfs",
Device: "tmpfs",
Destination: m.Destination,
Flags: defaultMountFlags,
Data: "mode=755",
PropagationFlags: m.PropagationFlags,
}
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
return err
}
for _, b := range binds {
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
return err
}
}
for _, mc := range merged {
for _, ss := range strings.Split(mc, ",") {
// symlink(2) is very dumb, it will just shove the path into
// the link and doesn't do any checks or relative path
// conversion. Also, don't error out if the cgroup already exists.
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
return err
}
}
}
if m.Flags&unix.MS_RDONLY != 0 {
// remount cgroup root as readonly
mcgrouproot := &configs.Mount{
Source: m.Destination,
Device: "bind",
Destination: m.Destination,
Flags: defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
}
}
default:
// ensure that the destination of the mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
var err error
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel)
}
return nil
}
func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
mounts, err := cgroups.GetCgroupMounts(false)
if err != nil {
return nil, err
}
cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
var binds []*configs.Mount
for _, mm := range mounts {
dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil {
return nil, err
}
relDir, err := filepath.Rel(mm.Root, dir)
if err != nil {
return nil, err
}
binds = append(binds, &configs.Mount{
Device: "bind",
Source: filepath.Join(mm.Mountpoint, relDir),
Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
Flags: unix.MS_BIND | unix.MS_REC | m.Flags,
PropagationFlags: m.PropagationFlags,
})
}
return binds, nil
}
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
// dest is required to be an abs path and have any symlinks resolved before calling this function.
func checkMountDestination(rootfs, dest string) error {
invalidDestinations := []string{
"/proc",
}
// White list, it should be sub directories of invalid destinations
validDestinations := []string{
// These entries can be bind mounted by files emulated by fuse,
// so commands like top, free displays stats in container.
"/proc/cpuinfo",
"/proc/diskstats",
"/proc/meminfo",
"/proc/stat",
"/proc/swaps",
"/proc/uptime",
"/proc/net/dev",
}
for _, valid := range validDestinations {
path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
if err != nil {
return err
}
if path == "." {
return nil
}
}
for _, invalid := range invalidDestinations {
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
if err != nil {
return err
}
if path == "." || !strings.HasPrefix(path, "..") {
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
}
}
return nil
}
func setupDevSymlinks(rootfs string) error {
var links = [][2]string{
{"/proc/self/fd", "/dev/fd"},
{"/proc/self/fd/0", "/dev/stdin"},
{"/proc/self/fd/1", "/dev/stdout"},
{"/proc/self/fd/2", "/dev/stderr"},
}
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
// in /dev if it exists in /proc.
if _, err := os.Stat("/proc/kcore"); err == nil {
links = append(links, [2]string{"/proc/kcore", "/dev/core"})
}
for _, link := range links {
var (
src = link[0]
dst = filepath.Join(rootfs, link[1])
)
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
return fmt.Errorf("symlink %s %s %s", src, dst, err)
}
}
return nil
}
// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
// this method will make them point to `/dev/null` in this container's rootfs. This
// needs to be called after we chroot/pivot into the container's rootfs so that any
// symlinks are resolved locally.
func reOpenDevNull() error {
var stat, devNullStat unix.Stat_t
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
return fmt.Errorf("Failed to open /dev/null - %s", err)
}
defer file.Close()
if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
return err
}
for fd := 0; fd < 3; fd++ {
if err := unix.Fstat(fd, &stat); err != nil {
return err
}
if stat.Rdev == devNullStat.Rdev {
// Close and re-open the fd.
if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
return err
}
}
}
return nil
}
// Create the device nodes in the container.
func createDevices(config *configs.Config) error {
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
oldMask := unix.Umask(0000)
for _, node := range config.Devices {
// containers running in a user namespace are not allowed to mknod
// devices so we can just bind mount it from the host.
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
unix.Umask(oldMask)
return err
}
}
unix.Umask(oldMask)
return nil
}
func bindMountDeviceNode(dest string, node *configs.Device) error {
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
}
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
dest := filepath.Join(rootfs, node.Path)
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
return err
}
if bind {
return bindMountDeviceNode(dest, node)
}
if err := mknodDevice(dest, node); err != nil {
if os.IsExist(err) {
return nil
} else if os.IsPermission(err) {
return bindMountDeviceNode(dest, node)
}
return err
}
return nil
}
func mknodDevice(dest string, node *configs.Device) error {
fileMode := node.FileMode
switch node.Type {
case 'c', 'u':
fileMode |= unix.S_IFCHR
case 'b':
fileMode |= unix.S_IFBLK
case 'p':
fileMode |= unix.S_IFIFO
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
return err
}
return unix.Chown(dest, int(node.Uid), int(node.Gid))
}
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
for _, m := range mountinfo {
if m.Mountpoint == dir {
return m
}
}
return nil
}
// Get the parent mount point of directory passed in as argument. Also return
// optional fields.
func getParentMount(rootfs string) (string, string, error) {
var path string
mountinfos, err := mount.GetMounts()
if err != nil {
return "", "", err
}
mountinfo := getMountInfo(mountinfos, rootfs)
if mountinfo != nil {
return rootfs, mountinfo.Optional, nil
}
path = rootfs
for {
path = filepath.Dir(path)
mountinfo = getMountInfo(mountinfos, path)
if mountinfo != nil {
return path, mountinfo.Optional, nil
}
if path == "/" {
break
}
}
// If we are here, we did not find parent mount. Something is wrong.
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
}
// Make parent mount private if it was shared
func rootfsParentMountPrivate(rootfs string) error {
sharedMount := false
parentMount, optionalOpts, err := getParentMount(rootfs)
if err != nil {
return err
}
optsSplit := strings.Split(optionalOpts, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
sharedMount = true
break
}
}
// Make parent mount PRIVATE if it was shared. It is needed for two
// reasons. First of all pivot_root() will fail if parent mount is
// shared. Secondly when we bind mount rootfs it will propagate to
// parent namespace and we don't want that to happen.
if sharedMount {
return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "")
}
return nil
}
func prepareRoot(config *configs.Config) error {
flag := unix.MS_SLAVE | unix.MS_REC
if config.RootPropagation != 0 {
flag = config.RootPropagation
}
if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
return err
}
// Make parent mount private to make sure following bind mount does
// not propagate in other namespaces. Also it will help with kernel
// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
return err
}
return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "")
}
func setReadonly() error {
return unix.Mount("/", "/", "bind", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
}
func setupPtmx(config *configs.Config) error {
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
return nil
}
// pivotRoot will call pivot_root such that rootfs becomes the new root
// filesystem, and everything else is cleaned up.
func pivotRoot(rootfs string) error {
// While the documentation may claim otherwise, pivot_root(".", ".") is
// actually valid. What this results in is / being the new root but
// /proc/self/cwd being the old root. Since we can play around with the cwd
// with pivot_root this allows us to pivot without creating directories in
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer unix.Close(oldroot)
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer unix.Close(newroot)
// Change to the new root so that the pivot_root actually acts on it.
if err := unix.Fchdir(newroot); err != nil {
return err
}
if err := unix.PivotRoot(".", "."); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
// really any guarantee from the kernel what /proc/self/cwd will be after a
// pivot_root(2).
if err := unix.Fchdir(oldroot); err != nil {
return err
}
// Make oldroot rprivate to make sure our unmounts don't propagate to the
// host (and thus bork the machine).
if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
return err
}
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
return err
}
// Switch back to our shiny new root.
if err := unix.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
return nil
}
func msMoveRoot(rootfs string) error {
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
return err
}
if err := unix.Chroot("."); err != nil {
return err
}
return unix.Chdir("/")
}
// createIfNotExists creates a file or a directory only if it does not already exist.
func createIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
return os.MkdirAll(path, 0755)
}
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0755)
if err != nil {
return err
}
f.Close()
}
}
return nil
}
// readonlyPath will make a path read only.
func readonlyPath(path string) error {
if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
}
// remountReadonly will remount an existing mount point and ensure that it is read-only.
func remountReadonly(m *configs.Mount) error {
var (
dest = m.Destination
flags = m.Flags
)
for i := 0; i < 5; i++ {
if err := unix.Mount("", dest, "", uintptr(flags|unix.MS_REMOUNT|unix.MS_RDONLY), ""); err != nil {
switch err {
case unix.EBUSY:
time.Sleep(100 * time.Millisecond)
continue
default:
return err
}
}
return nil
}
return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
}
// maskPath masks the top of the specified path inside a container to avoid
// security issues from processes reading information from non-namespace aware
// mounts ( proc/kcore ).
// For files, maskPath bind mounts /dev/null over the top of the specified path.
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
func maskPath(path string) error {
if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
if err == unix.ENOTDIR {
return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, "")
}
return err
}
return nil
}
// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
func writeSystemProperty(key, value string) error {
keyPath := strings.Replace(key, ".", "/", -1)
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
}
func remount(m *configs.Mount, rootfs string) error {
var (
dest = m.Destination
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
if err := unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), ""); err != nil {
return err
}
return nil
}
// Do the mount operation followed by additional mounts required to take care
// of propagation flags.
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
var (
dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel)
flags = m.Flags
)
if libcontainerUtils.CleanPath(dest) == "/dev" {
flags &= ^unix.MS_RDONLY
}
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
dest = filepath.Join(rootfs, dest)
}
if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
return err
}
for _, pflag := range m.PropagationFlags {
if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,76 @@
package seccomp
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/configs"
)
var operators = map[string]configs.Operator{
"SCMP_CMP_NE": configs.NotEqualTo,
"SCMP_CMP_LT": configs.LessThan,
"SCMP_CMP_LE": configs.LessThanOrEqualTo,
"SCMP_CMP_EQ": configs.EqualTo,
"SCMP_CMP_GE": configs.GreaterThanOrEqualTo,
"SCMP_CMP_GT": configs.GreaterThan,
"SCMP_CMP_MASKED_EQ": configs.MaskEqualTo,
}
var actions = map[string]configs.Action{
"SCMP_ACT_KILL": configs.Kill,
"SCMP_ACT_ERRNO": configs.Errno,
"SCMP_ACT_TRAP": configs.Trap,
"SCMP_ACT_ALLOW": configs.Allow,
"SCMP_ACT_TRACE": configs.Trace,
}
var archs = map[string]string{
"SCMP_ARCH_X86": "x86",
"SCMP_ARCH_X86_64": "amd64",
"SCMP_ARCH_X32": "x32",
"SCMP_ARCH_ARM": "arm",
"SCMP_ARCH_AARCH64": "arm64",
"SCMP_ARCH_MIPS": "mips",
"SCMP_ARCH_MIPS64": "mips64",
"SCMP_ARCH_MIPS64N32": "mips64n32",
"SCMP_ARCH_MIPSEL": "mipsel",
"SCMP_ARCH_MIPSEL64": "mipsel64",
"SCMP_ARCH_MIPSEL64N32": "mipsel64n32",
"SCMP_ARCH_PPC": "ppc",
"SCMP_ARCH_PPC64": "ppc64",
"SCMP_ARCH_PPC64LE": "ppc64le",
"SCMP_ARCH_S390": "s390",
"SCMP_ARCH_S390X": "s390x",
}
// ConvertStringToOperator converts a string into a Seccomp comparison operator.
// Comparison operators use the names they are assigned by Libseccomp's header.
// Attempting to convert a string that is not a valid operator results in an
// error.
func ConvertStringToOperator(in string) (configs.Operator, error) {
if op, ok := operators[in]; ok == true {
return op, nil
}
return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
}
// ConvertStringToAction converts a string into a Seccomp rule match action.
// Actions use the names they are assigned in Libseccomp's header, though some
// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
// return errors.
// Attempting to convert a string that is not a valid action results in an
// error.
func ConvertStringToAction(in string) (configs.Action, error) {
if act, ok := actions[in]; ok == true {
return act, nil
}
return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
}
// ConvertStringToArch converts a string into a Seccomp comparison arch.
func ConvertStringToArch(in string) (string, error) {
if arch, ok := archs[in]; ok == true {
return arch, nil
}
return "", fmt.Errorf("string %s is not a valid arch for seccomp", in)
}

View file

@ -0,0 +1,227 @@
// +build linux,cgo,seccomp
package seccomp
import (
"bufio"
"fmt"
"os"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
libseccomp "github.com/seccomp/libseccomp-golang"
"golang.org/x/sys/unix"
)
var (
actAllow = libseccomp.ActAllow
actTrap = libseccomp.ActTrap
actKill = libseccomp.ActKill
actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
)
// Filters given syscalls in a container, preventing them from being used
// Started in the container init process, and carried over to all child processes
// Setns calls, however, require a separate invocation, as they are not children
// of the init until they join the namespace
func InitSeccomp(config *configs.Seccomp) error {
if config == nil {
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
}
defaultAction, err := getAction(config.DefaultAction)
if err != nil {
return fmt.Errorf("error initializing seccomp - invalid default action")
}
filter, err := libseccomp.NewFilter(defaultAction)
if err != nil {
return fmt.Errorf("error creating filter: %s", err)
}
// Add extra architectures
for _, arch := range config.Architectures {
scmpArch, err := libseccomp.GetArchFromString(arch)
if err != nil {
return err
}
if err := filter.AddArch(scmpArch); err != nil {
return err
}
}
// Unset no new privs bit
if err := filter.SetNoNewPrivsBit(false); err != nil {
return fmt.Errorf("error setting no new privileges: %s", err)
}
// Add a rule for each syscall
for _, call := range config.Syscalls {
if call == nil {
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
}
if err = matchCall(filter, call); err != nil {
return err
}
}
if err = filter.Load(); err != nil {
return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
}
return nil
}
// IsEnabled returns if the kernel has been configured to support seccomp.
func IsEnabled() bool {
// Try to read from /proc/self/status for kernels > 3.8
s, err := parseStatusFile("/proc/self/status")
if err != nil {
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
return true
}
}
return false
}
_, ok := s["Seccomp"]
return ok
}
// Convert Libcontainer Action to Libseccomp ScmpAction
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
switch act {
case configs.Kill:
return actKill, nil
case configs.Errno:
return actErrno, nil
case configs.Trap:
return actTrap, nil
case configs.Allow:
return actAllow, nil
case configs.Trace:
return actTrace, nil
default:
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
}
}
// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
switch op {
case configs.EqualTo:
return libseccomp.CompareEqual, nil
case configs.NotEqualTo:
return libseccomp.CompareNotEqual, nil
case configs.GreaterThan:
return libseccomp.CompareGreater, nil
case configs.GreaterThanOrEqualTo:
return libseccomp.CompareGreaterEqual, nil
case configs.LessThan:
return libseccomp.CompareLess, nil
case configs.LessThanOrEqualTo:
return libseccomp.CompareLessOrEqual, nil
case configs.MaskEqualTo:
return libseccomp.CompareMaskedEqual, nil
default:
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
}
}
// Convert Libcontainer Arg to Libseccomp ScmpCondition
func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
cond := libseccomp.ScmpCondition{}
if arg == nil {
return cond, fmt.Errorf("cannot convert nil to syscall condition")
}
op, err := getOperator(arg.Op)
if err != nil {
return cond, err
}
return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
}
// Add a rule to match a single syscall
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
if call == nil || filter == nil {
return fmt.Errorf("cannot use nil as syscall to block")
}
if len(call.Name) == 0 {
return fmt.Errorf("empty string is not a valid syscall")
}
// If we can't resolve the syscall, assume it's not supported on this kernel
// Ignore it, don't error out
callNum, err := libseccomp.GetSyscallFromName(call.Name)
if err != nil {
return nil
}
// Convert the call's action to the libseccomp equivalent
callAct, err := getAction(call.Action)
if err != nil {
return err
}
// Unconditional match - just add the rule
if len(call.Args) == 0 {
if err = filter.AddRule(callNum, callAct); err != nil {
return err
}
} else {
// Conditional match - convert the per-arg rules into library format
conditions := []libseccomp.ScmpCondition{}
for _, cond := range call.Args {
newCond, err := getCondition(cond)
if err != nil {
return err
}
conditions = append(conditions, newCond)
}
if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
return err
}
}
return nil
}
func parseStatusFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
s := bufio.NewScanner(f)
status := make(map[string]string)
for s.Scan() {
text := s.Text()
parts := strings.Split(text, ":")
if len(parts) <= 1 {
continue
}
status[parts[0]] = parts[1]
}
if err := s.Err(); err != nil {
return nil, err
}
return status, nil
}

View file

@ -0,0 +1,24 @@
// +build !linux !cgo !seccomp
package seccomp
import (
"errors"
"github.com/opencontainers/runc/libcontainer/configs"
)
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
// InitSeccomp does nothing because seccomp is not supported.
func InitSeccomp(config *configs.Seccomp) error {
if config != nil {
return ErrSeccompNotEnabled
}
return nil
}
// IsEnabled returns false, because it is not supported.
func IsEnabled() bool {
return false
}

View file

@ -0,0 +1,11 @@
// +build linux,go1.5
package libcontainer
import "syscall"
// Set the GidMappingsEnableSetgroups member to true, so the process's
// setgroups proc entry wont be set to 'deny' if GidMappings are set
func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = true
}

View file

@ -0,0 +1,65 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/sys/unix"
)
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
pipe *os.File
consoleSocket *os.File
config *initConfig
}
func (l *linuxSetnsInit) getSessionRingName() string {
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
}
func (l *linuxSetnsInit) Init() error {
if !l.config.Config.NoNewKeyring {
// do not inherit the parent's session keyring
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err
}
}
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
if l.config.NoNewPrivileges {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}
if l.config.Config.Seccomp != nil {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View file

@ -0,0 +1,27 @@
package stacktrace
import "runtime"
// Capture captures a stacktrace for the current calling go program
//
// skip is the number of frames to skip
func Capture(userSkip int) Stacktrace {
var (
skip = userSkip + 1 // add one for our own function
frames []Frame
prevPc uintptr
)
for i := skip; ; i++ {
pc, file, line, ok := runtime.Caller(i)
//detect if caller is repeated to avoid loop, gccgo
//currently runs into a loop without this check
if !ok || pc == prevPc {
break
}
frames = append(frames, NewFrame(pc, file, line))
prevPc = pc
}
return Stacktrace{
Frames: frames,
}
}

View file

@ -0,0 +1,38 @@
package stacktrace
import (
"path/filepath"
"runtime"
"strings"
)
// NewFrame returns a new stack frame for the provided information
func NewFrame(pc uintptr, file string, line int) Frame {
fn := runtime.FuncForPC(pc)
if fn == nil {
return Frame{}
}
pack, name := parseFunctionName(fn.Name())
return Frame{
Line: line,
File: filepath.Base(file),
Package: pack,
Function: name,
}
}
func parseFunctionName(name string) (string, string) {
i := strings.LastIndex(name, ".")
if i == -1 {
return "", name
}
return name[:i], name[i+1:]
}
// Frame contains all the information for a stack frame within a go program
type Frame struct {
File string
Function string
Package string
Line int
}

View file

@ -0,0 +1,5 @@
package stacktrace
type Stacktrace struct {
Frames []Frame
}

View file

@ -0,0 +1,188 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"os/exec"
"syscall" //only for Exec
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/sys/unix"
)
type linuxStandardInit struct {
pipe *os.File
consoleSocket *os.File
parentPid int
stateDirFD int
config *initConfig
}
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
var newperms uint32
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
// with user ns we need 'other' search permissions
newperms = 0x8
} else {
// without user ns we need 'UID' search permissions
newperms = 0x80000
}
// create a unique per session container name that we can
// join in setns; however, other containers can also join it
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
}
func (l *linuxStandardInit) Init() error {
if !l.config.Config.NoNewKeyring {
ringname, keepperms, newperms := l.getSessionRingParams()
// do not inherit the parent's session keyring
sessKeyId, err := keys.JoinSessionKeyring(ringname)
if err != nil {
return err
}
// make session keyring searcheable
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err
}
}
if err := setupNetwork(l.config); err != nil {
return err
}
if err := setupRoute(l.config.Config); err != nil {
return err
}
label.Init()
// prepareRootfs() can be executed only for a new mount namespace.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := prepareRootfs(l.pipe, l.config.Config); err != nil {
return err
}
}
// Set up the console. This has to be done *before* we finalize the rootfs,
// but *after* we've given the user the chance to set up all of the mounts
// they wanted.
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, true); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
// Finish the rootfs setup.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := finalizeRootfs(l.config.Config); err != nil {
return err
}
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := unix.Sethostname([]byte(hostname)); err != nil {
return err
}
}
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
for key, value := range l.config.Config.Sysctl {
if err := writeSystemProperty(key, value); err != nil {
return err
}
}
for _, path := range l.config.Config.ReadonlyPaths {
if err := readonlyPath(path); err != nil {
return err
}
}
for _, path := range l.config.Config.MaskPaths {
if err := maskPath(path); err != nil {
return err
}
}
pdeath, err := system.GetParentDeathSignal()
if err != nil {
return err
}
if l.config.NoNewPrivileges {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}
// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
if err := syncParentReady(l.pipe); err != nil {
return err
}
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
// finalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := pdeath.Restore(); err != nil {
return err
}
// compare the parent from the initial start of the init process and make sure that it did not change.
// if the parent changes that means it died and we were reparented to something else so we should
// just kill ourself and not cause problems for someone else.
if unix.Getppid() != l.parentPid {
return unix.Kill(unix.Getpid(), unix.SIGKILL)
}
// check for the arg before waiting to make sure it exists and it is returned
// as a create time error.
name, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
// close the pipe to signal that we have completed our init.
l.pipe.Close()
// wait for the fifo to be opened on the other side before
// exec'ing the users process.
fd, err := unix.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|unix.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "openat exec fifo")
}
if _, err := unix.Write(fd, []byte("0")); err != nil {
return newSystemErrorWithCause(err, "write 0 exec fifo")
}
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
// close the statedir fd before exec because the kernel resets dumpable in the wrong order
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
unix.Close(l.stateDirFD)
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}
return nil
}

View file

@ -0,0 +1,248 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func newStateTransitionError(from, to containerState) error {
return &stateTransitionError{
From: from.status().String(),
To: to.status().String(),
}
}
// stateTransitionError is returned when an invalid state transition happens from one
// state to another.
type stateTransitionError struct {
From string
To string
}
func (s *stateTransitionError) Error() string {
return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
}
type containerState interface {
transition(containerState) error
destroy() error
status() Status
}
func destroy(c *linuxContainer) error {
if !c.config.Namespaces.Contains(configs.NEWPID) {
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
logrus.Warn(err)
}
}
err := c.cgroupManager.Destroy()
if rerr := os.RemoveAll(c.root); err == nil {
err = rerr
}
c.initProcess = nil
if herr := runPoststopHooks(c); err == nil {
err = herr
}
c.state = &stoppedState{c: c}
return err
}
func runPoststopHooks(c *linuxContainer) error {
if c.config.Hooks != nil {
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Bundle: utils.SearchLabels(c.config.Labels, "bundle"),
}
for _, hook := range c.config.Hooks.Poststop {
if err := hook.Run(s); err != nil {
return err
}
}
}
return nil
}
// stoppedState represents a container is a stopped/destroyed state.
type stoppedState struct {
c *linuxContainer
}
func (b *stoppedState) status() Status {
return Stopped
}
func (b *stoppedState) transition(s containerState) error {
switch s.(type) {
case *runningState, *restoredState:
b.c.state = s
return nil
case *stoppedState:
return nil
}
return newStateTransitionError(b, s)
}
func (b *stoppedState) destroy() error {
return destroy(b.c)
}
// runningState represents a container that is currently running.
type runningState struct {
c *linuxContainer
}
func (r *runningState) status() Status {
return Running
}
func (r *runningState) transition(s containerState) error {
switch s.(type) {
case *stoppedState:
t, err := r.c.runType()
if err != nil {
return err
}
if t == Running {
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
}
r.c.state = s
return nil
case *pausedState:
r.c.state = s
return nil
case *runningState:
return nil
}
return newStateTransitionError(r, s)
}
func (r *runningState) destroy() error {
t, err := r.c.runType()
if err != nil {
return err
}
if t == Running {
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
}
return destroy(r.c)
}
type createdState struct {
c *linuxContainer
}
func (i *createdState) status() Status {
return Created
}
func (i *createdState) transition(s containerState) error {
switch s.(type) {
case *runningState, *pausedState, *stoppedState:
i.c.state = s
return nil
case *createdState:
return nil
}
return newStateTransitionError(i, s)
}
func (i *createdState) destroy() error {
i.c.initProcess.signal(unix.SIGKILL)
return destroy(i.c)
}
// pausedState represents a container that is currently pause. It cannot be destroyed in a
// paused state and must transition back to running first.
type pausedState struct {
c *linuxContainer
}
func (p *pausedState) status() Status {
return Paused
}
func (p *pausedState) transition(s containerState) error {
switch s.(type) {
case *runningState, *stoppedState:
p.c.state = s
return nil
case *pausedState:
return nil
}
return newStateTransitionError(p, s)
}
func (p *pausedState) destroy() error {
t, err := p.c.runType()
if err != nil {
return err
}
if t != Running && t != Created {
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
return err
}
return destroy(p.c)
}
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
}
// restoredState is the same as the running state but also has associated checkpoint
// information that maybe need destroyed when the container is stopped and destroy is called.
type restoredState struct {
imageDir string
c *linuxContainer
}
func (r *restoredState) status() Status {
return Running
}
func (r *restoredState) transition(s containerState) error {
switch s.(type) {
case *stoppedState, *runningState:
return nil
}
return newStateTransitionError(r, s)
}
func (r *restoredState) destroy() error {
if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
if !os.IsNotExist(err) {
return err
}
}
return destroy(r.c)
}
// loadedState is used whenever a container is restored, loaded, or setting additional
// processes inside and it should not be destroyed when it is exiting.
type loadedState struct {
c *linuxContainer
s Status
}
func (n *loadedState) status() Status {
return n.s
}
func (n *loadedState) transition(s containerState) error {
n.c.state = s
return nil
}
func (n *loadedState) destroy() error {
if err := n.c.refreshState(); err != nil {
return err
}
return n.c.state.destroy()
}

View file

@ -0,0 +1,15 @@
package libcontainer
type NetworkInterface struct {
// Name is the name of the network interface.
Name string
RxBytes uint64
RxPackets uint64
RxErrors uint64
RxDropped uint64
TxBytes uint64
TxPackets uint64
TxErrors uint64
TxDropped uint64
}

View file

@ -0,0 +1,5 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -0,0 +1,8 @@
package libcontainer
import "github.com/opencontainers/runc/libcontainer/cgroups"
type Stats struct {
Interfaces []*NetworkInterface
CgroupStats *cgroups.Stats
}

View file

@ -0,0 +1,7 @@
package libcontainer
// Solaris - TODO
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -0,0 +1,5 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View file

@ -0,0 +1,107 @@
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"github.com/opencontainers/runc/libcontainer/utils"
)
type syncType string
// Constants that are used for synchronisation between the parent and child
// during container setup. They come in pairs (with procError being a generic
// response which is followed by a &genericError).
//
// [ child ] <-> [ parent ]
//
// procHooks --> [run hooks]
// <-- procResume
//
// procConsole -->
// <-- procConsoleReq
// [send(fd)] --> [recv(fd)]
// <-- procConsoleAck
//
// procReady --> [final setup]
// <-- procRun
const (
procError syncType = "procError"
procReady syncType = "procReady"
procRun syncType = "procRun"
procHooks syncType = "procHooks"
procResume syncType = "procResume"
)
type syncT struct {
Type syncType `json:"type"`
}
// writeSync is used to write to a synchronisation pipe. An error is returned
// if there was a problem writing the payload.
func writeSync(pipe io.Writer, sync syncType) error {
if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
return err
}
return nil
}
// readSync is used to read from a synchronisation pipe. An error is returned
// if we got a genericError, the pipe was closed, or we got an unexpected flag.
func readSync(pipe io.Reader, expected syncType) error {
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type == procError {
var ierr genericError
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
return fmt.Errorf("failed reading error from parent: %v", err)
}
return &ierr
}
if procSync.Type != expected {
return fmt.Errorf("invalid synchronisation flag from parent")
}
}
return nil
}
// parseSync runs the given callback function on each syncT received from the
// child. It will return once io.EOF is returned from the given pipe.
func parseSync(pipe io.Reader, fn func(*syncT) error) error {
dec := json.NewDecoder(pipe)
for {
var sync syncT
if err := dec.Decode(&sync); err != nil {
if err == io.EOF {
break
}
return err
}
// We handle this case outside fn for cleanliness reasons.
var ierr *genericError
if sync.Type == procError {
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding proc error from init")
}
if ierr != nil {
return ierr
}
// Programmer error.
panic("No error following JSON procError payload.")
}
if err := fn(&sync); err != nil {
return err
}
}
return nil
}

192
vendor/github.com/vishvananda/netlink/LICENSE generated vendored Normal file
View file

@ -0,0 +1,192 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2014 Vishvananda Ishaya.
Copyright 2014 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

91
vendor/github.com/vishvananda/netlink/README.md generated vendored Normal file
View file

@ -0,0 +1,91 @@
# netlink - netlink library for go #
[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
The netlink package provides a simple netlink library for go. Netlink
is the interface a user-space program in linux uses to communicate with
the kernel. It can be used to add and remove interfaces, set ip addresses
and routes, and configure ipsec. Netlink communication requires elevated
privileges, so in most cases this code needs to be run as root. Since
low-level netlink messages are inscrutable at best, the library attempts
to provide an api that is loosely modeled on the CLI provided by iproute2.
Actions like `ip link add` will be accomplished via a similarly named
function like AddLink(). This library began its life as a fork of the
netlink functionality in
[docker/libcontainer](https://github.com/docker/libcontainer) but was
heavily rewritten to improve testability, performance, and to add new
functionality like ipsec xfrm handling.
## Local Build and Test ##
You can use go get command:
go get github.com/vishvananda/netlink
Testing dependencies:
go get github.com/vishvananda/netns
Testing (requires root):
sudo -E go test github.com/vishvananda/netlink
## Examples ##
Add a new bridge and add eth1 into it:
```go
package main
import (
"fmt"
"github.com/vishvananda/netlink"
)
func main() {
la := netlink.NewLinkAttrs()
la.Name = "foo"
mybridge := &netlink.Bridge{LinkAttrs: la}
err := netlink.LinkAdd(mybridge)
if err != nil {
fmt.Printf("could not add %s: %v\n", la.Name, err)
}
eth1, _ := netlink.LinkByName("eth1")
netlink.LinkSetMaster(eth1, mybridge)
}
```
Note `NewLinkAttrs` constructor, it sets default values in structure. For now
it sets only `TxQLen` to `-1`, so kernel will set default by itself. If you're
using simple initialization(`LinkAttrs{Name: "foo"}`) `TxQLen` will be set to
`0` unless you specify it like `LinkAttrs{Name: "foo", TxQLen: 1000}`.
Add a new ip address to loopback:
```go
package main
import (
"github.com/vishvananda/netlink"
)
func main() {
lo, _ := netlink.LinkByName("lo")
addr, _ := netlink.ParseAddr("169.254.169.254/32")
netlink.AddrAdd(lo, addr)
}
```
## Future Work ##
Many pieces of netlink are not yet fully supported in the high-level
interface. Aspects of virtually all of the high-level objects don't exist.
Many of the underlying primitives are there, so its a matter of putting
the right fields into the high-level objects and making sure that they
are serialized and deserialized correctly in the Add and List methods.
There are also a few pieces of low level netlink functionality that still
need to be implemented. Routing rules are not in place and some of the
more advanced link types. Hopefully there is decent structure and testing
in place to make these fairly straightforward to add.

56
vendor/github.com/vishvananda/netlink/addr.go generated vendored Normal file
View file

@ -0,0 +1,56 @@
package netlink
import (
"fmt"
"net"
"strings"
)
// Addr represents an IP address from netlink. Netlink ip addresses
// include a mask, so it stores the address as a net.IPNet.
type Addr struct {
*net.IPNet
Label string
Flags int
Scope int
Peer *net.IPNet
Broadcast net.IP
PreferedLft int
ValidLft int
}
// String returns $ip/$netmask $label
func (a Addr) String() string {
return strings.TrimSpace(fmt.Sprintf("%s %s", a.IPNet, a.Label))
}
// ParseAddr parses the string representation of an address in the
// form $ip/$netmask $label. The label portion is optional
func ParseAddr(s string) (*Addr, error) {
label := ""
parts := strings.Split(s, " ")
if len(parts) > 1 {
s = parts[0]
label = parts[1]
}
m, err := ParseIPNet(s)
if err != nil {
return nil, err
}
return &Addr{IPNet: m, Label: label}, nil
}
// Equal returns true if both Addrs have the same net.IPNet value.
func (a Addr) Equal(x Addr) bool {
sizea, _ := a.Mask.Size()
sizeb, _ := x.Mask.Size()
// ignore label for comparison
return a.IP.Equal(x.IP) && sizea == sizeb
}
func (a Addr) PeerEqual(x Addr) bool {
sizea, _ := a.Peer.Mask.Size()
sizeb, _ := x.Peer.Mask.Size()
// ignore label for comparison
return a.Peer.IP.Equal(x.Peer.IP) && sizea == sizeb
}

288
vendor/github.com/vishvananda/netlink/addr_linux.go generated vendored Normal file
View file

@ -0,0 +1,288 @@
package netlink
import (
"fmt"
"log"
"net"
"strings"
"syscall"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)
// IFA_FLAGS is a u32 attribute.
const IFA_FLAGS = 0x8
// AddrAdd will add an IP address to a link device.
// Equivalent to: `ip addr add $addr dev $link`
func AddrAdd(link Link, addr *Addr) error {
return pkgHandle.AddrAdd(link, addr)
}
// AddrAdd will add an IP address to a link device.
// Equivalent to: `ip addr add $addr dev $link`
func (h *Handle) AddrAdd(link Link, addr *Addr) error {
req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
return h.addrHandle(link, addr, req)
}
// AddrReplace will replace (or, if not present, add) an IP address on a link device.
// Equivalent to: `ip addr replace $addr dev $link`
func AddrReplace(link Link, addr *Addr) error {
return pkgHandle.AddrReplace(link, addr)
}
// AddrReplace will replace (or, if not present, add) an IP address on a link device.
// Equivalent to: `ip addr replace $addr dev $link`
func (h *Handle) AddrReplace(link Link, addr *Addr) error {
req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE|syscall.NLM_F_ACK)
return h.addrHandle(link, addr, req)
}
// AddrDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func AddrDel(link Link, addr *Addr) error {
return pkgHandle.AddrDel(link, addr)
}
// AddrDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func (h *Handle) AddrDel(link Link, addr *Addr) error {
req := h.newNetlinkRequest(syscall.RTM_DELADDR, syscall.NLM_F_ACK)
return h.addrHandle(link, addr, req)
}
func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error {
base := link.Attrs()
if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) {
return fmt.Errorf("label must begin with interface name")
}
h.ensureIndex(base)
family := nl.GetIPFamily(addr.IP)
msg := nl.NewIfAddrmsg(family)
msg.Index = uint32(base.Index)
msg.Scope = uint8(addr.Scope)
prefixlen, _ := addr.Mask.Size()
msg.Prefixlen = uint8(prefixlen)
req.AddData(msg)
var localAddrData []byte
if family == FAMILY_V4 {
localAddrData = addr.IP.To4()
} else {
localAddrData = addr.IP.To16()
}
localData := nl.NewRtAttr(syscall.IFA_LOCAL, localAddrData)
req.AddData(localData)
var peerAddrData []byte
if addr.Peer != nil {
if family == FAMILY_V4 {
peerAddrData = addr.Peer.IP.To4()
} else {
peerAddrData = addr.Peer.IP.To16()
}
} else {
peerAddrData = localAddrData
}
addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, peerAddrData)
req.AddData(addressData)
if addr.Flags != 0 {
if addr.Flags <= 0xff {
msg.IfAddrmsg.Flags = uint8(addr.Flags)
} else {
b := make([]byte, 4)
native.PutUint32(b, uint32(addr.Flags))
flagsData := nl.NewRtAttr(IFA_FLAGS, b)
req.AddData(flagsData)
}
}
if addr.Broadcast != nil {
req.AddData(nl.NewRtAttr(syscall.IFA_BROADCAST, addr.Broadcast))
}
if addr.Label != "" {
labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label))
req.AddData(labelData)
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// AddrList gets a list of IP addresses in the system.
// Equivalent to: `ip addr show`.
// The list can be filtered by link and ip family.
func AddrList(link Link, family int) ([]Addr, error) {
return pkgHandle.AddrList(link, family)
}
// AddrList gets a list of IP addresses in the system.
// Equivalent to: `ip addr show`.
// The list can be filtered by link and ip family.
func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
req := h.newNetlinkRequest(syscall.RTM_GETADDR, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(family)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWADDR)
if err != nil {
return nil, err
}
indexFilter := 0
if link != nil {
base := link.Attrs()
h.ensureIndex(base)
indexFilter = base.Index
}
var res []Addr
for _, m := range msgs {
addr, msgFamily, ifindex, err := parseAddr(m)
if err != nil {
return res, err
}
if link != nil && ifindex != indexFilter {
// Ignore messages from other interfaces
continue
}
if family != FAMILY_ALL && msgFamily != family {
continue
}
res = append(res, addr)
}
return res, nil
}
func parseAddr(m []byte) (addr Addr, family, index int, err error) {
msg := nl.DeserializeIfAddrmsg(m)
family = -1
index = -1
attrs, err1 := nl.ParseRouteAttr(m[msg.Len():])
if err1 != nil {
err = err1
return
}
family = int(msg.Family)
index = int(msg.Index)
var local, dst *net.IPNet
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.IFA_ADDRESS:
dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
}
addr.Peer = dst
case syscall.IFA_LOCAL:
local = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
}
addr.IPNet = local
case syscall.IFA_BROADCAST:
addr.Broadcast = attr.Value
case syscall.IFA_LABEL:
addr.Label = string(attr.Value[:len(attr.Value)-1])
case IFA_FLAGS:
addr.Flags = int(native.Uint32(attr.Value[0:4]))
case nl.IFA_CACHEINFO:
ci := nl.DeserializeIfaCacheInfo(attr.Value)
addr.PreferedLft = int(ci.IfaPrefered)
addr.ValidLft = int(ci.IfaValid)
}
}
// IFA_LOCAL should be there but if not, fall back to IFA_ADDRESS
if local != nil {
addr.IPNet = local
} else {
addr.IPNet = dst
}
addr.Scope = int(msg.Scope)
return
}
type AddrUpdate struct {
LinkAddress net.IPNet
LinkIndex int
Flags int
Scope int
PreferedLft int
ValidLft int
NewAddr bool // true=added false=deleted
}
// AddrSubscribe takes a chan down which notifications will be sent
// when addresses change. Close the 'done' chan to stop subscription.
func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
return addrSubscribe(netns.None(), netns.None(), ch, done)
}
// AddrSubscribeAt works like AddrSubscribe plus it allows the caller
// to choose the network namespace in which to subscribe (ns).
func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
return addrSubscribe(ns, netns.None(), ch, done)
}
func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR)
if err != nil {
return err
}
if done != nil {
go func() {
<-done
s.Close()
}()
}
go func() {
defer close(ch)
for {
msgs, err := s.Receive()
if err != nil {
log.Printf("netlink.AddrSubscribe: Receive() error: %v", err)
return
}
for _, m := range msgs {
msgType := m.Header.Type
if msgType != syscall.RTM_NEWADDR && msgType != syscall.RTM_DELADDR {
log.Printf("netlink.AddrSubscribe: bad message type: %d", msgType)
continue
}
addr, _, ifindex, err := parseAddr(m.Data)
if err != nil {
log.Printf("netlink.AddrSubscribe: could not parse address: %v", err)
continue
}
ch <- AddrUpdate{LinkAddress: *addr.IPNet,
LinkIndex: ifindex,
NewAddr: msgType == syscall.RTM_NEWADDR,
Flags: addr.Flags,
Scope: addr.Scope,
PreferedLft: addr.PreferedLft,
ValidLft: addr.ValidLft}
}
}
}()
return nil
}

62
vendor/github.com/vishvananda/netlink/bpf_linux.go generated vendored Normal file
View file

@ -0,0 +1,62 @@
package netlink
/*
#include <asm/types.h>
#include <asm/unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
static int load_simple_bpf(int prog_type, int ret) {
#ifdef __NR_bpf
// { return ret; }
__u64 __attribute__((aligned(8))) insns[] = {
0x00000000000000b7ull | ((__u64)ret<<32),
0x0000000000000095ull,
};
__u8 __attribute__((aligned(8))) license[] = "ASL2";
// Copied from a header file since libc is notoriously slow to update.
// The call will succeed or fail and that will be our indication on
// whether or not it is supported.
struct {
__u32 prog_type;
__u32 insn_cnt;
__u64 insns;
__u64 license;
__u32 log_level;
__u32 log_size;
__u64 log_buf;
__u32 kern_version;
} __attribute__((aligned(8))) attr = {
.prog_type = prog_type,
.insn_cnt = 2,
.insns = (uintptr_t)&insns,
.license = (uintptr_t)&license,
};
return syscall(__NR_bpf, 5, &attr, sizeof(attr));
#else
errno = EINVAL;
return -1;
#endif
}
*/
import "C"
type BpfProgType C.int
const (
BPF_PROG_TYPE_UNSPEC BpfProgType = iota
BPF_PROG_TYPE_SOCKET_FILTER
BPF_PROG_TYPE_KPROBE
BPF_PROG_TYPE_SCHED_CLS
BPF_PROG_TYPE_SCHED_ACT
BPF_PROG_TYPE_TRACEPOINT
BPF_PROG_TYPE_XDP
)
// loadSimpleBpf loads a trivial bpf program for testing purposes
func loadSimpleBpf(progType BpfProgType, ret int) (int, error) {
fd, err := C.load_simple_bpf(C.int(progType), C.int(ret))
return int(fd), err
}

115
vendor/github.com/vishvananda/netlink/bridge_linux.go generated vendored Normal file
View file

@ -0,0 +1,115 @@
package netlink
import (
"fmt"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// BridgeVlanList gets a map of device id to bridge vlan infos.
// Equivalent to: `bridge vlan show`
func BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) {
return pkgHandle.BridgeVlanList()
}
// BridgeVlanList gets a map of device id to bridge vlan infos.
// Equivalent to: `bridge vlan show`
func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) {
req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN))))
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWLINK)
if err != nil {
return nil, err
}
ret := make(map[int32][]*nl.BridgeVlanInfo)
for _, m := range msgs {
msg := nl.DeserializeIfInfomsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.IFLA_AF_SPEC:
//nested attr
nestAttrs, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, fmt.Errorf("failed to parse nested attr %v", err)
}
for _, nestAttr := range nestAttrs {
switch nestAttr.Attr.Type {
case nl.IFLA_BRIDGE_VLAN_INFO:
vlanInfo := nl.DeserializeBridgeVlanInfo(nestAttr.Value)
ret[msg.Index] = append(ret[msg.Index], vlanInfo)
}
}
}
}
}
return ret, nil
}
// BridgeVlanAdd adds a new vlan filter entry
// Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error {
return pkgHandle.BridgeVlanAdd(link, vid, pvid, untagged, self, master)
}
// BridgeVlanAdd adds a new vlan filter entry
// Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error {
return h.bridgeVlanModify(syscall.RTM_SETLINK, link, vid, pvid, untagged, self, master)
}
// BridgeVlanDel adds a new vlan filter entry
// Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error {
return pkgHandle.BridgeVlanDel(link, vid, pvid, untagged, self, master)
}
// BridgeVlanDel adds a new vlan filter entry
// Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error {
return h.bridgeVlanModify(syscall.RTM_DELLINK, link, vid, pvid, untagged, self, master)
}
func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error {
base := link.Attrs()
h.ensureIndex(base)
req := h.newNetlinkRequest(cmd, syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
msg.Index = int32(base.Index)
req.AddData(msg)
br := nl.NewRtAttr(nl.IFLA_AF_SPEC, nil)
var flags uint16
if self {
flags |= nl.BRIDGE_FLAGS_SELF
}
if master {
flags |= nl.BRIDGE_FLAGS_MASTER
}
if flags > 0 {
nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags))
}
vlanInfo := &nl.BridgeVlanInfo{Vid: vid}
if pvid {
vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_PVID
}
if untagged {
vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED
}
nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize())
req.AddData(br)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
if err != nil {
return err
}
return nil
}

78
vendor/github.com/vishvananda/netlink/class.go generated vendored Normal file
View file

@ -0,0 +1,78 @@
package netlink
import (
"fmt"
)
type Class interface {
Attrs() *ClassAttrs
Type() string
}
// ClassAttrs represents a netlink class. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type ClassAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Leaf uint32
}
func (q ClassAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
}
type HtbClassAttrs struct {
// TODO handle all attributes
Rate uint64
Ceil uint64
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func (q HtbClassAttrs) String() string {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
// HtbClass represents an Htb class
type HtbClass struct {
ClassAttrs
Rate uint64
Ceil uint64
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func (q HtbClass) String() string {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
func (q *HtbClass) Attrs() *ClassAttrs {
return &q.ClassAttrs
}
func (q *HtbClass) Type() string {
return "htb"
}
// GenericClass classes represent types that are not currently understood
// by this netlink library.
type GenericClass struct {
ClassAttrs
ClassType string
}
func (class *GenericClass) Attrs() *ClassAttrs {
return &class.ClassAttrs
}
func (class *GenericClass) Type() string {
return class.ClassType
}

254
vendor/github.com/vishvananda/netlink/class_linux.go generated vendored Normal file
View file

@ -0,0 +1,254 @@
package netlink
import (
"errors"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// NOTE: function is in here because it uses other linux functions
func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
mtu := 1600
rate := cattrs.Rate / 8
ceil := cattrs.Ceil / 8
buffer := cattrs.Buffer
cbuffer := cattrs.Cbuffer
if ceil == 0 {
ceil = rate
}
if buffer == 0 {
buffer = uint32(float64(rate)/Hz() + float64(mtu))
}
buffer = uint32(Xmittime(rate, buffer))
if cbuffer == 0 {
cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
}
cbuffer = uint32(Xmittime(ceil, cbuffer))
return &HtbClass{
ClassAttrs: attrs,
Rate: rate,
Ceil: ceil,
Buffer: buffer,
Cbuffer: cbuffer,
Quantum: 10,
Level: 0,
Prio: 0,
}
}
// ClassDel will delete a class from the system.
// Equivalent to: `tc class del $class`
func ClassDel(class Class) error {
return pkgHandle.ClassDel(class)
}
// ClassDel will delete a class from the system.
// Equivalent to: `tc class del $class`
func (h *Handle) ClassDel(class Class) error {
return h.classModify(syscall.RTM_DELTCLASS, 0, class)
}
// ClassChange will change a class in place
// Equivalent to: `tc class change $class`
// The parent and handle MUST NOT be changed.
func ClassChange(class Class) error {
return pkgHandle.ClassChange(class)
}
// ClassChange will change a class in place
// Equivalent to: `tc class change $class`
// The parent and handle MUST NOT be changed.
func (h *Handle) ClassChange(class Class) error {
return h.classModify(syscall.RTM_NEWTCLASS, 0, class)
}
// ClassReplace will replace a class to the system.
// quivalent to: `tc class replace $class`
// The handle MAY be changed.
// If a class already exist with this parent/handle pair, the class is changed.
// If a class does not already exist with this parent/handle, a new class is created.
func ClassReplace(class Class) error {
return pkgHandle.ClassReplace(class)
}
// ClassReplace will replace a class to the system.
// quivalent to: `tc class replace $class`
// The handle MAY be changed.
// If a class already exist with this parent/handle pair, the class is changed.
// If a class does not already exist with this parent/handle, a new class is created.
func (h *Handle) ClassReplace(class Class) error {
return h.classModify(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE, class)
}
// ClassAdd will add a class to the system.
// Equivalent to: `tc class add $class`
func ClassAdd(class Class) error {
return pkgHandle.ClassAdd(class)
}
// ClassAdd will add a class to the system.
// Equivalent to: `tc class add $class`
func (h *Handle) ClassAdd(class Class) error {
return h.classModify(
syscall.RTM_NEWTCLASS,
syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
class,
)
}
func (h *Handle) classModify(cmd, flags int, class Class) error {
req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
base := class.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
if cmd != syscall.RTM_DELTCLASS {
if err := classPayload(req, class); err != nil {
return err
}
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
func classPayload(req *nl.NetlinkRequest, class Class) error {
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if htb, ok := class.(*HtbClass); ok {
opt := nl.TcHtbCopt{}
opt.Buffer = htb.Buffer
opt.Cbuffer = htb.Cbuffer
opt.Quantum = htb.Quantum
opt.Level = htb.Level
opt.Prio = htb.Prio
// TODO: Handle Debug properly. For now default to 0
/* Calculate {R,C}Tab and set Rate and Ceil */
cellLog := -1
ccellLog := -1
linklayer := nl.LINKLAYER_ETHERNET
mtu := 1600
var rtab [256]uint32
var ctab [256]uint32
tcrate := nl.TcRateSpec{Rate: uint32(htb.Rate)}
if CalcRtable(&tcrate, rtab, cellLog, uint32(mtu), linklayer) < 0 {
return errors.New("HTB: failed to calculate rate table")
}
opt.Rate = tcrate
tcceil := nl.TcRateSpec{Rate: uint32(htb.Ceil)}
if CalcRtable(&tcceil, ctab, ccellLog, uint32(mtu), linklayer) < 0 {
return errors.New("HTB: failed to calculate ceil rate table")
}
opt.Ceil = tcceil
nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
nl.NewRtAttrChild(options, nl.TCA_HTB_RTAB, SerializeRtab(rtab))
nl.NewRtAttrChild(options, nl.TCA_HTB_CTAB, SerializeRtab(ctab))
}
req.AddData(options)
return nil
}
// ClassList gets a list of classes in the system.
// Equivalent to: `tc class show`.
// Generally returns nothing if link and parent are not specified.
func ClassList(link Link, parent uint32) ([]Class, error) {
return pkgHandle.ClassList(link, parent)
}
// ClassList gets a list of classes in the system.
// Equivalent to: `tc class show`.
// Generally returns nothing if link and parent are not specified.
func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
req := h.newNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
h.ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS)
if err != nil {
return nil, err
}
var res []Class
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
base := ClassAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
}
var class Class
classType := ""
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
classType = string(attr.Value[:len(attr.Value)-1])
switch classType {
case "htb":
class = &HtbClass{}
default:
class = &GenericClass{ClassType: classType}
}
case nl.TCA_OPTIONS:
switch classType {
case "htb":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
_, err = parseHtbClassData(class, data)
if err != nil {
return nil, err
}
}
}
}
*class.Attrs() = base
res = append(res, class)
}
return res, nil
}
func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
htb := class.(*HtbClass)
detailed := false
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_HTB_PARMS:
opt := nl.DeserializeTcHtbCopt(datum.Value)
htb.Rate = uint64(opt.Rate.Rate)
htb.Ceil = uint64(opt.Ceil.Rate)
htb.Buffer = opt.Buffer
htb.Cbuffer = opt.Cbuffer
htb.Quantum = opt.Quantum
htb.Level = opt.Level
htb.Prio = opt.Prio
}
}
return detailed, nil
}

View file

@ -0,0 +1,371 @@
package netlink
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"net"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// ConntrackTableType Conntrack table for the netlink operation
type ConntrackTableType uint8
const (
// ConntrackTable Conntrack table
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK 1
ConntrackTable = 1
// ConntrackExpectTable Conntrack expect table
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
ConntrackExpectTable = 2
)
const (
// For Parsing Mark
TCP_PROTO = 6
UDP_PROTO = 17
)
const (
// backward compatibility with golang 1.6 which does not have io.SeekCurrent
seekCurrent = 1
)
// InetFamily Family type
type InetFamily uint8
// -L [table] [options] List conntrack or expectation table
// -G [table] parameters Get conntrack or expectation
// -I [table] parameters Create a conntrack or expectation
// -U [table] parameters Update a conntrack
// -E [table] [options] Show events
// -C [table] Show counter
// -S Show statistics
// ConntrackTableList returns the flow list of a table of a specific family
// conntrack -L [table] [options] List conntrack or expectation table
func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
return pkgHandle.ConntrackTableList(table, family)
}
// ConntrackTableFlush flushes all the flows of a specified table
// conntrack -F [table] Flush table
// The flush operation applies to all the family types
func ConntrackTableFlush(table ConntrackTableType) error {
return pkgHandle.ConntrackTableFlush(table)
}
// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
// conntrack -D [table] parameters Delete conntrack or expectation
func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
return pkgHandle.ConntrackDeleteFilter(table, family, filter)
}
// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
// conntrack -L [table] [options] List conntrack or expectation table
func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
res, err := h.dumpConntrackTable(table, family)
if err != nil {
return nil, err
}
// Deserialize all the flows
var result []*ConntrackFlow
for _, dataRaw := range res {
result = append(result, parseRawData(dataRaw))
}
return result, nil
}
// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
// conntrack -F [table] Flush table
// The flush operation applies to all the family types
func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
req := h.newConntrackRequest(table, syscall.AF_INET, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
_, err := req.Execute(syscall.NETLINK_NETFILTER, 0)
return err
}
// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
// conntrack -D [table] parameters Delete conntrack or expectation
func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
res, err := h.dumpConntrackTable(table, family)
if err != nil {
return 0, err
}
var matched uint
for _, dataRaw := range res {
flow := parseRawData(dataRaw)
if match := filter.MatchConntrackFlow(flow); match {
req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
req2.AddRawData(dataRaw[4:])
req2.Execute(syscall.NETLINK_NETFILTER, 0)
matched++
}
}
return matched, nil
}
func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
// Create the Netlink request object
req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
// Add the netfilter header
msg := &nl.Nfgenmsg{
NfgenFamily: uint8(family),
Version: nl.NFNETLINK_V0,
ResId: 0,
}
req.AddData(msg)
return req
}
func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, syscall.NLM_F_DUMP)
return req.Execute(syscall.NETLINK_NETFILTER, 0)
}
// The full conntrack flow structure is very complicated and can be found in the file:
// http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
// For the time being, the structure below allows to parse and extract the base information of a flow
type ipTuple struct {
SrcIP net.IP
DstIP net.IP
Protocol uint8
SrcPort uint16
DstPort uint16
}
type ConntrackFlow struct {
FamilyType uint8
Forward ipTuple
Reverse ipTuple
Mark uint32
}
func (s *ConntrackFlow) String() string {
// conntrack cmd output:
// udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 mark=0
return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d mark=%d",
nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort,
s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Mark)
}
// This method parse the ip tuple structure
// The message structure is the following:
// <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
// <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
// <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
// <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
// <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 {
for i := 0; i < 2; i++ {
_, t, _, v := parseNfAttrTLV(reader)
switch t {
case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
tpl.SrcIP = v
case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
tpl.DstIP = v
}
}
// Skip the next 4 bytes nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
reader.Seek(4, seekCurrent)
_, t, _, v := parseNfAttrTLV(reader)
if t == nl.CTA_PROTO_NUM {
tpl.Protocol = uint8(v[0])
}
// Skip some padding 3 bytes
reader.Seek(3, seekCurrent)
for i := 0; i < 2; i++ {
_, t, _ := parseNfAttrTL(reader)
switch t {
case nl.CTA_PROTO_SRC_PORT:
parseBERaw16(reader, &tpl.SrcPort)
case nl.CTA_PROTO_DST_PORT:
parseBERaw16(reader, &tpl.DstPort)
}
// Skip some padding 2 byte
reader.Seek(2, seekCurrent)
}
return tpl.Protocol
}
func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
isNested, attrType, len = parseNfAttrTL(r)
value = make([]byte, len)
binary.Read(r, binary.BigEndian, &value)
return isNested, attrType, len, value
}
func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
binary.Read(r, nl.NativeEndian(), &len)
len -= nl.SizeofNfattr
binary.Read(r, nl.NativeEndian(), &attrType)
isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
attrType = attrType & (nl.NLA_F_NESTED - 1)
return isNested, attrType, len
}
func parseBERaw16(r *bytes.Reader, v *uint16) {
binary.Read(r, binary.BigEndian, v)
}
func parseRawData(data []byte) *ConntrackFlow {
s := &ConntrackFlow{}
var proto uint8
// First there is the Nfgenmsg header
// consume only the family field
reader := bytes.NewReader(data)
binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
// skip rest of the Netfilter header
reader.Seek(3, seekCurrent)
// The message structure is the following:
// <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
// flow information of the forward flow
// <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
// flow information of the reverse flow
for reader.Len() > 0 {
nested, t, l := parseNfAttrTL(reader)
if nested && t == nl.CTA_TUPLE_ORIG {
if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
proto = parseIpTuple(reader, &s.Forward)
}
} else if nested && t == nl.CTA_TUPLE_REPLY {
if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
parseIpTuple(reader, &s.Reverse)
// Got all the useful information stop parsing
break
} else {
// Header not recognized skip it
reader.Seek(int64(l), seekCurrent)
}
}
}
if proto == TCP_PROTO {
reader.Seek(64, seekCurrent)
_, t, _, v := parseNfAttrTLV(reader)
if t == nl.CTA_MARK {
s.Mark = uint32(v[3])
}
} else if proto == UDP_PROTO {
reader.Seek(16, seekCurrent)
_, t, _, v := parseNfAttrTLV(reader)
if t == nl.CTA_MARK {
s.Mark = uint32(v[3])
}
}
return s
}
// Conntrack parameters and options:
// -n, --src-nat ip source NAT ip
// -g, --dst-nat ip destination NAT ip
// -j, --any-nat ip source or destination NAT ip
// -m, --mark mark Set mark
// -c, --secmark secmark Set selinux secmark
// -e, --event-mask eventmask Event mask, eg. NEW,DESTROY
// -z, --zero Zero counters while listing
// -o, --output type[,...] Output format, eg. xml
// -l, --label label[,...] conntrack labels
// Common parameters and options:
// -s, --src, --orig-src ip Source address from original direction
// -d, --dst, --orig-dst ip Destination address from original direction
// -r, --reply-src ip Source addres from reply direction
// -q, --reply-dst ip Destination address from reply direction
// -p, --protonum proto Layer 4 Protocol, eg. 'tcp'
// -f, --family proto Layer 3 Protocol, eg. 'ipv6'
// -t, --timeout timeout Set timeout
// -u, --status status Set status, eg. ASSURED
// -w, --zone value Set conntrack zone
// --orig-zone value Set zone for original direction
// --reply-zone value Set zone for reply direction
// -b, --buffer-size Netlink socket buffer size
// --mask-src ip Source mask address
// --mask-dst ip Destination mask address
// Filter types
type ConntrackFilterType uint8
const (
ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction
ConntrackOrigDstIP // -orig-dst ip Destination address from original direction
ConntrackNatSrcIP // -src-nat ip Source NAT ip
ConntrackNatDstIP // -dst-nat ip Destination NAT ip
ConntrackNatAnyIP // -any-nat ip Source or destination NAT ip
)
type CustomConntrackFilter interface {
// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches
// the filter or false otherwise
MatchConntrackFlow(flow *ConntrackFlow) bool
}
type ConntrackFilter struct {
ipFilter map[ConntrackFilterType]net.IP
}
// AddIP adds an IP to the conntrack filter
func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
if f.ipFilter == nil {
f.ipFilter = make(map[ConntrackFilterType]net.IP)
}
if _, ok := f.ipFilter[tp]; ok {
return errors.New("Filter attribute already present")
}
f.ipFilter[tp] = ip
return nil
}
// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
// false otherwise
func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
if len(f.ipFilter) == 0 {
// empty filter always not match
return false
}
match := true
// -orig-src ip Source address from original direction
if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
match = match && elem.Equal(flow.Forward.SrcIP)
}
// -orig-dst ip Destination address from original direction
if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
match = match && elem.Equal(flow.Forward.DstIP)
}
// -src-nat ip Source NAT ip
if elem, found := f.ipFilter[ConntrackNatSrcIP]; match && found {
match = match && elem.Equal(flow.Reverse.SrcIP)
}
// -dst-nat ip Destination NAT ip
if elem, found := f.ipFilter[ConntrackNatDstIP]; match && found {
match = match && elem.Equal(flow.Reverse.DstIP)
}
// -any-nat ip Source or destination NAT ip
if elem, found := f.ipFilter[ConntrackNatAnyIP]; match && found {
match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
}
return match
}
var _ CustomConntrackFilter = (*ConntrackFilter)(nil)

View file

@ -0,0 +1,53 @@
// +build !linux
package netlink
// ConntrackTableType Conntrack table for the netlink operation
type ConntrackTableType uint8
// InetFamily Family type
type InetFamily uint8
// ConntrackFlow placeholder
type ConntrackFlow struct{}
// ConntrackFilter placeholder
type ConntrackFilter struct{}
// ConntrackTableList returns the flow list of a table of a specific family
// conntrack -L [table] [options] List conntrack or expectation table
func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
return nil, ErrNotImplemented
}
// ConntrackTableFlush flushes all the flows of a specified table
// conntrack -F [table] Flush table
// The flush operation applies to all the family types
func ConntrackTableFlush(table ConntrackTableType) error {
return ErrNotImplemented
}
// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
// conntrack -D [table] parameters Delete conntrack or expectation
func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
return 0, ErrNotImplemented
}
// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
// conntrack -L [table] [options] List conntrack or expectation table
func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
return nil, ErrNotImplemented
}
// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
// conntrack -F [table] Flush table
// The flush operation applies to all the family types
func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
return ErrNotImplemented
}
// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
// conntrack -D [table] parameters Delete conntrack or expectation
func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
return 0, ErrNotImplemented
}

283
vendor/github.com/vishvananda/netlink/filter.go generated vendored Normal file
View file

@ -0,0 +1,283 @@
package netlink
import (
"fmt"
"github.com/vishvananda/netlink/nl"
)
type Filter interface {
Attrs() *FilterAttrs
Type() string
}
// FilterAttrs represents a netlink filter. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type FilterAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Priority uint16 // lower is higher priority
Protocol uint16 // syscall.ETH_P_*
}
func (q FilterAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol)
}
type TcAct int32
const (
TC_ACT_UNSPEC TcAct = -1
TC_ACT_OK TcAct = 0
TC_ACT_RECLASSIFY TcAct = 1
TC_ACT_SHOT TcAct = 2
TC_ACT_PIPE TcAct = 3
TC_ACT_STOLEN TcAct = 4
TC_ACT_QUEUED TcAct = 5
TC_ACT_REPEAT TcAct = 6
TC_ACT_REDIRECT TcAct = 7
TC_ACT_JUMP TcAct = 0x10000000
)
func (a TcAct) String() string {
switch a {
case TC_ACT_UNSPEC:
return "unspec"
case TC_ACT_OK:
return "ok"
case TC_ACT_RECLASSIFY:
return "reclassify"
case TC_ACT_SHOT:
return "shot"
case TC_ACT_PIPE:
return "pipe"
case TC_ACT_STOLEN:
return "stolen"
case TC_ACT_QUEUED:
return "queued"
case TC_ACT_REPEAT:
return "repeat"
case TC_ACT_REDIRECT:
return "redirect"
case TC_ACT_JUMP:
return "jump"
}
return fmt.Sprintf("0x%x", int32(a))
}
type TcPolAct int32
const (
TC_POLICE_UNSPEC TcPolAct = TcPolAct(TC_ACT_UNSPEC)
TC_POLICE_OK TcPolAct = TcPolAct(TC_ACT_OK)
TC_POLICE_RECLASSIFY TcPolAct = TcPolAct(TC_ACT_RECLASSIFY)
TC_POLICE_SHOT TcPolAct = TcPolAct(TC_ACT_SHOT)
TC_POLICE_PIPE TcPolAct = TcPolAct(TC_ACT_PIPE)
)
func (a TcPolAct) String() string {
switch a {
case TC_POLICE_UNSPEC:
return "unspec"
case TC_POLICE_OK:
return "ok"
case TC_POLICE_RECLASSIFY:
return "reclassify"
case TC_POLICE_SHOT:
return "shot"
case TC_POLICE_PIPE:
return "pipe"
}
return fmt.Sprintf("0x%x", int32(a))
}
type ActionAttrs struct {
Index int
Capab int
Action TcAct
Refcnt int
Bindcnt int
}
func (q ActionAttrs) String() string {
return fmt.Sprintf("{Index: %d, Capab: %x, Action: %s, Refcnt: %d, Bindcnt: %d}", q.Index, q.Capab, q.Action.String(), q.Refcnt, q.Bindcnt)
}
// Action represents an action in any supported filter.
type Action interface {
Attrs() *ActionAttrs
Type() string
}
type GenericAction struct {
ActionAttrs
}
func (action *GenericAction) Type() string {
return "generic"
}
func (action *GenericAction) Attrs() *ActionAttrs {
return &action.ActionAttrs
}
type BpfAction struct {
ActionAttrs
Fd int
Name string
}
func (action *BpfAction) Type() string {
return "bpf"
}
func (action *BpfAction) Attrs() *ActionAttrs {
return &action.ActionAttrs
}
type MirredAct uint8
func (a MirredAct) String() string {
switch a {
case TCA_EGRESS_REDIR:
return "egress redir"
case TCA_EGRESS_MIRROR:
return "egress mirror"
case TCA_INGRESS_REDIR:
return "ingress redir"
case TCA_INGRESS_MIRROR:
return "ingress mirror"
}
return "unknown"
}
const (
TCA_EGRESS_REDIR MirredAct = 1 /* packet redirect to EGRESS*/
TCA_EGRESS_MIRROR MirredAct = 2 /* mirror packet to EGRESS */
TCA_INGRESS_REDIR MirredAct = 3 /* packet redirect to INGRESS*/
TCA_INGRESS_MIRROR MirredAct = 4 /* mirror packet to INGRESS */
)
type MirredAction struct {
ActionAttrs
MirredAction MirredAct
Ifindex int
}
func (action *MirredAction) Type() string {
return "mirred"
}
func (action *MirredAction) Attrs() *ActionAttrs {
return &action.ActionAttrs
}
func NewMirredAction(redirIndex int) *MirredAction {
return &MirredAction{
ActionAttrs: ActionAttrs{
Action: TC_ACT_STOLEN,
},
MirredAction: TCA_EGRESS_REDIR,
Ifindex: redirIndex,
}
}
// Constants used in TcU32Sel.Flags.
const (
TC_U32_TERMINAL = nl.TC_U32_TERMINAL
TC_U32_OFFSET = nl.TC_U32_OFFSET
TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET
TC_U32_EAT = nl.TC_U32_EAT
)
// Sel of the U32 filters that contains multiple TcU32Key. This is the copy
// and the frontend representation of nl.TcU32Sel. It is serialized into canonical
// nl.TcU32Sel with the appropriate endianness.
type TcU32Sel struct {
Flags uint8
Offshift uint8
Nkeys uint8
Pad uint8
Offmask uint16
Off uint16
Offoff int16
Hoff int16
Hmask uint32
Keys []TcU32Key
}
// TcU32Key contained of Sel in the U32 filters. This is the copy and the frontend
// representation of nl.TcU32Key. It is serialized into chanonical nl.TcU32Sel
// with the appropriate endianness.
type TcU32Key struct {
Mask uint32
Val uint32
Off int32
OffMask int32
}
// U32 filters on many packet related properties
type U32 struct {
FilterAttrs
ClassId uint32
RedirIndex int
Sel *TcU32Sel
Actions []Action
}
func (filter *U32) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *U32) Type() string {
return "u32"
}
type FilterFwAttrs struct {
ClassId uint32
InDev string
Mask uint32
Index uint32
Buffer uint32
Mtu uint32
Mpu uint16
Rate uint32
AvRate uint32
PeakRate uint32
Action TcPolAct
Overhead uint16
LinkLayer int
}
type BpfFilter struct {
FilterAttrs
ClassId uint32
Fd int
Name string
DirectAction bool
}
func (filter *BpfFilter) Type() string {
return "bpf"
}
func (filter *BpfFilter) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
// GenericFilter filters represent types that are not currently understood
// by this netlink library.
type GenericFilter struct {
FilterAttrs
FilterType string
}
func (filter *GenericFilter) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *GenericFilter) Type() string {
return filter.FilterType
}

591
vendor/github.com/vishvananda/netlink/filter_linux.go generated vendored Normal file
View file

@ -0,0 +1,591 @@
package netlink
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"syscall"
"unsafe"
"github.com/vishvananda/netlink/nl"
)
// Fw filter filters on firewall marks
// NOTE: this is in filter_linux because it refers to nl.TcPolice which
// is defined in nl/tc_linux.go
type Fw struct {
FilterAttrs
ClassId uint32
// TODO remove nl type from interface
Police nl.TcPolice
InDev string
// TODO Action
Mask uint32
AvRate uint32
Rtab [256]uint32
Ptab [256]uint32
}
func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
var rtab [256]uint32
var ptab [256]uint32
rcellLog := -1
pcellLog := -1
avrate := fattrs.AvRate / 8
police := nl.TcPolice{}
police.Rate.Rate = fattrs.Rate / 8
police.PeakRate.Rate = fattrs.PeakRate / 8
buffer := fattrs.Buffer
linklayer := nl.LINKLAYER_ETHERNET
if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
linklayer = fattrs.LinkLayer
}
police.Action = int32(fattrs.Action)
if police.Rate.Rate != 0 {
police.Rate.Mpu = fattrs.Mpu
police.Rate.Overhead = fattrs.Overhead
if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("TBF: failed to calculate rate table")
}
police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
}
police.Mtu = fattrs.Mtu
if police.PeakRate.Rate != 0 {
police.PeakRate.Mpu = fattrs.Mpu
police.PeakRate.Overhead = fattrs.Overhead
if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("POLICE: failed to calculate peak rate table")
}
}
return &Fw{
FilterAttrs: attrs,
ClassId: fattrs.ClassId,
InDev: fattrs.InDev,
Mask: fattrs.Mask,
Police: police,
AvRate: avrate,
Rtab: rtab,
Ptab: ptab,
}, nil
}
func (filter *Fw) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *Fw) Type() string {
return "fw"
}
// FilterDel will delete a filter from the system.
// Equivalent to: `tc filter del $filter`
func FilterDel(filter Filter) error {
return pkgHandle.FilterDel(filter)
}
// FilterDel will delete a filter from the system.
// Equivalent to: `tc filter del $filter`
func (h *Handle) FilterDel(filter Filter) error {
req := h.newNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// FilterAdd will add a filter to the system.
// Equivalent to: `tc filter add $filter`
func FilterAdd(filter Filter) error {
return pkgHandle.FilterAdd(filter)
}
// FilterAdd will add a filter to the system.
// Equivalent to: `tc filter add $filter`
func (h *Handle) FilterAdd(filter Filter) error {
native = nl.NativeEndian()
req := h.newNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
}
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if u32, ok := filter.(*U32); ok {
// Convert TcU32Sel into nl.TcU32Sel as it is without copy.
sel := (*nl.TcU32Sel)(unsafe.Pointer(u32.Sel))
if sel == nil {
// match all
sel = &nl.TcU32Sel{
Nkeys: 1,
Flags: nl.TC_U32_TERMINAL,
}
sel.Keys = append(sel.Keys, nl.TcU32Key{})
}
if native != networkOrder {
// Copy TcU32Sel.
cSel := *sel
keys := make([]nl.TcU32Key, cap(sel.Keys))
copy(keys, sel.Keys)
cSel.Keys = keys
sel = &cSel
// Handle the endianness of attributes
sel.Offmask = native.Uint16(htons(sel.Offmask))
sel.Hmask = native.Uint32(htonl(sel.Hmask))
for i, key := range sel.Keys {
sel.Keys[i].Mask = native.Uint32(htonl(key.Mask))
sel.Keys[i].Val = native.Uint32(htonl(key.Val))
}
}
sel.Nkeys = uint8(len(sel.Keys))
nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
if u32.ClassId != 0 {
nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId))
}
actionsAttr := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
// backwards compatibility
if u32.RedirIndex != 0 {
u32.Actions = append([]Action{NewMirredAction(u32.RedirIndex)}, u32.Actions...)
}
if err := EncodeActions(actionsAttr, u32.Actions); err != nil {
return err
}
} else if fw, ok := filter.(*Fw); ok {
if fw.Mask != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.Mask)
nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
}
if fw.InDev != "" {
nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
}
if (fw.Police != nl.TcPolice{}) {
police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
if (fw.Police.Rate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Rtab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
}
if (fw.Police.PeakRate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Ptab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
}
}
if fw.ClassId != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.ClassId)
nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
}
} else if bpf, ok := filter.(*BpfFilter); ok {
var bpfFlags uint32
if bpf.ClassId != 0 {
nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(bpf.ClassId))
}
if bpf.Fd >= 0 {
nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(bpf.Fd))))
}
if bpf.Name != "" {
nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(bpf.Name))
}
if bpf.DirectAction {
bpfFlags |= nl.TCA_BPF_FLAG_ACT_DIRECT
}
nl.NewRtAttrChild(options, nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags))
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// FilterList gets a list of filters in the system.
// Equivalent to: `tc filter show`.
// Generally returns nothing if link and parent are not specified.
func FilterList(link Link, parent uint32) ([]Filter, error) {
return pkgHandle.FilterList(link, parent)
}
// FilterList gets a list of filters in the system.
// Equivalent to: `tc filter show`.
// Generally returns nothing if link and parent are not specified.
func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
req := h.newNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
h.ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER)
if err != nil {
return nil, err
}
var res []Filter
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
base := FilterAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
}
base.Priority, base.Protocol = MajorMinor(msg.Info)
base.Protocol = nl.Swap16(base.Protocol)
var filter Filter
filterType := ""
detailed := false
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
filterType = string(attr.Value[:len(attr.Value)-1])
switch filterType {
case "u32":
filter = &U32{}
case "fw":
filter = &Fw{}
case "bpf":
filter = &BpfFilter{}
default:
filter = &GenericFilter{FilterType: filterType}
}
case nl.TCA_OPTIONS:
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
switch filterType {
case "u32":
detailed, err = parseU32Data(filter, data)
if err != nil {
return nil, err
}
case "fw":
detailed, err = parseFwData(filter, data)
if err != nil {
return nil, err
}
case "bpf":
detailed, err = parseBpfData(filter, data)
if err != nil {
return nil, err
}
default:
detailed = true
}
}
}
// only return the detailed version of the filter
if detailed {
*filter.Attrs() = base
res = append(res, filter)
}
}
return res, nil
}
func toTcGen(attrs *ActionAttrs, tcgen *nl.TcGen) {
tcgen.Index = uint32(attrs.Index)
tcgen.Capab = uint32(attrs.Capab)
tcgen.Action = int32(attrs.Action)
tcgen.Refcnt = int32(attrs.Refcnt)
tcgen.Bindcnt = int32(attrs.Bindcnt)
}
func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) {
attrs.Index = int(tcgen.Index)
attrs.Capab = int(tcgen.Capab)
attrs.Action = TcAct(tcgen.Action)
attrs.Refcnt = int(tcgen.Refcnt)
attrs.Bindcnt = int(tcgen.Bindcnt)
}
func EncodeActions(attr *nl.RtAttr, actions []Action) error {
tabIndex := int(nl.TCA_ACT_TAB)
for _, action := range actions {
switch action := action.(type) {
default:
return fmt.Errorf("unknown action type %s", action.Type())
case *MirredAction:
table := nl.NewRtAttrChild(attr, tabIndex, nil)
tabIndex++
nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("mirred"))
aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
mirred := nl.TcMirred{
Eaction: int32(action.MirredAction),
Ifindex: uint32(action.Ifindex),
}
toTcGen(action.Attrs(), &mirred.TcGen)
nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mirred.Serialize())
case *BpfAction:
table := nl.NewRtAttrChild(attr, tabIndex, nil)
tabIndex++
nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("bpf"))
aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
gen := nl.TcGen{}
toTcGen(action.Attrs(), &gen)
nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_PARMS, gen.Serialize())
nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd)))
nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name))
case *GenericAction:
table := nl.NewRtAttrChild(attr, tabIndex, nil)
tabIndex++
nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("gact"))
aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
gen := nl.TcGen{}
toTcGen(action.Attrs(), &gen)
nl.NewRtAttrChild(aopts, nl.TCA_GACT_PARMS, gen.Serialize())
}
}
return nil
}
func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
var actions []Action
for _, table := range tables {
var action Action
var actionType string
aattrs, err := nl.ParseRouteAttr(table.Value)
if err != nil {
return nil, err
}
nextattr:
for _, aattr := range aattrs {
switch aattr.Attr.Type {
case nl.TCA_KIND:
actionType = string(aattr.Value[:len(aattr.Value)-1])
// only parse if the action is mirred or bpf
switch actionType {
case "mirred":
action = &MirredAction{}
case "bpf":
action = &BpfAction{}
case "gact":
action = &GenericAction{}
default:
break nextattr
}
case nl.TCA_OPTIONS:
adata, err := nl.ParseRouteAttr(aattr.Value)
if err != nil {
return nil, err
}
for _, adatum := range adata {
switch actionType {
case "mirred":
switch adatum.Attr.Type {
case nl.TCA_MIRRED_PARMS:
mirred := *nl.DeserializeTcMirred(adatum.Value)
toAttrs(&mirred.TcGen, action.Attrs())
action.(*MirredAction).ActionAttrs = ActionAttrs{}
action.(*MirredAction).Ifindex = int(mirred.Ifindex)
action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction)
}
case "bpf":
switch adatum.Attr.Type {
case nl.TCA_ACT_BPF_PARMS:
gen := *nl.DeserializeTcGen(adatum.Value)
toAttrs(&gen, action.Attrs())
case nl.TCA_ACT_BPF_FD:
action.(*BpfAction).Fd = int(native.Uint32(adatum.Value[0:4]))
case nl.TCA_ACT_BPF_NAME:
action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1])
}
case "gact":
switch adatum.Attr.Type {
case nl.TCA_GACT_PARMS:
gen := *nl.DeserializeTcGen(adatum.Value)
toAttrs(&gen, action.Attrs())
}
}
}
}
}
actions = append(actions, action)
}
return actions, nil
}
func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
u32 := filter.(*U32)
detailed := false
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_U32_SEL:
detailed = true
sel := nl.DeserializeTcU32Sel(datum.Value)
u32.Sel = (*TcU32Sel)(unsafe.Pointer(sel))
if native != networkOrder {
// Handle the endianness of attributes
u32.Sel.Offmask = native.Uint16(htons(sel.Offmask))
u32.Sel.Hmask = native.Uint32(htonl(sel.Hmask))
for i, key := range u32.Sel.Keys {
u32.Sel.Keys[i].Mask = native.Uint32(htonl(key.Mask))
u32.Sel.Keys[i].Val = native.Uint32(htonl(key.Val))
}
}
case nl.TCA_U32_ACT:
tables, err := nl.ParseRouteAttr(datum.Value)
if err != nil {
return detailed, err
}
u32.Actions, err = parseActions(tables)
if err != nil {
return detailed, err
}
for _, action := range u32.Actions {
if action, ok := action.(*MirredAction); ok {
u32.RedirIndex = int(action.Ifindex)
}
}
case nl.TCA_U32_CLASSID:
u32.ClassId = native.Uint32(datum.Value)
}
}
return detailed, nil
}
func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
fw := filter.(*Fw)
detailed := true
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_FW_MASK:
fw.Mask = native.Uint32(datum.Value[0:4])
case nl.TCA_FW_CLASSID:
fw.ClassId = native.Uint32(datum.Value[0:4])
case nl.TCA_FW_INDEV:
fw.InDev = string(datum.Value[:len(datum.Value)-1])
case nl.TCA_FW_POLICE:
adata, _ := nl.ParseRouteAttr(datum.Value)
for _, aattr := range adata {
switch aattr.Attr.Type {
case nl.TCA_POLICE_TBF:
fw.Police = *nl.DeserializeTcPolice(aattr.Value)
case nl.TCA_POLICE_RATE:
fw.Rtab = DeserializeRtab(aattr.Value)
case nl.TCA_POLICE_PEAKRATE:
fw.Ptab = DeserializeRtab(aattr.Value)
}
}
}
}
return detailed, nil
}
func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
bpf := filter.(*BpfFilter)
detailed := true
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_BPF_FD:
bpf.Fd = int(native.Uint32(datum.Value[0:4]))
case nl.TCA_BPF_NAME:
bpf.Name = string(datum.Value[:len(datum.Value)-1])
case nl.TCA_BPF_CLASSID:
bpf.ClassId = native.Uint32(datum.Value[0:4])
case nl.TCA_BPF_FLAGS:
flags := native.Uint32(datum.Value[0:4])
if (flags & nl.TCA_BPF_FLAG_ACT_DIRECT) != 0 {
bpf.DirectAction = true
}
}
}
return detailed, nil
}
func AlignToAtm(size uint) uint {
var linksize, cells int
cells = int(size / nl.ATM_CELL_PAYLOAD)
if (size % nl.ATM_CELL_PAYLOAD) > 0 {
cells++
}
linksize = cells * nl.ATM_CELL_SIZE
return uint(linksize)
}
func AdjustSize(sz uint, mpu uint, linklayer int) uint {
if sz < mpu {
sz = mpu
}
switch linklayer {
case nl.LINKLAYER_ATM:
return AlignToAtm(sz)
default:
return sz
}
}
func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cellLog int, mtu uint32, linklayer int) int {
bps := rate.Rate
mpu := rate.Mpu
var sz uint
if mtu == 0 {
mtu = 2047
}
if cellLog < 0 {
cellLog = 0
for (mtu >> uint(cellLog)) > 255 {
cellLog++
}
}
for i := 0; i < 256; i++ {
sz = AdjustSize(uint((i+1)<<uint32(cellLog)), uint(mpu), linklayer)
rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
}
rate.CellAlign = -1
rate.CellLog = uint8(cellLog)
rate.Linklayer = uint8(linklayer & nl.TC_LINKLAYER_MASK)
return cellLog
}
func DeserializeRtab(b []byte) [256]uint32 {
var rtab [256]uint32
native := nl.NativeEndian()
r := bytes.NewReader(b)
_ = binary.Read(r, native, &rtab)
return rtab
}
func SerializeRtab(rtab [256]uint32) []byte {
native := nl.NativeEndian()
var w bytes.Buffer
_ = binary.Write(&w, native, rtab)
return w.Bytes()
}

View file

@ -0,0 +1,167 @@
package netlink
import (
"fmt"
"syscall"
"github.com/vishvananda/netlink/nl"
)
type GenlOp struct {
ID uint32
Flags uint32
}
type GenlMulticastGroup struct {
ID uint32
Name string
}
type GenlFamily struct {
ID uint16
HdrSize uint32
Name string
Version uint32
MaxAttr uint32
Ops []GenlOp
Groups []GenlMulticastGroup
}
func parseOps(b []byte) ([]GenlOp, error) {
attrs, err := nl.ParseRouteAttr(b)
if err != nil {
return nil, err
}
ops := make([]GenlOp, 0, len(attrs))
for _, a := range attrs {
nattrs, err := nl.ParseRouteAttr(a.Value)
if err != nil {
return nil, err
}
var op GenlOp
for _, na := range nattrs {
switch na.Attr.Type {
case nl.GENL_CTRL_ATTR_OP_ID:
op.ID = native.Uint32(na.Value)
case nl.GENL_CTRL_ATTR_OP_FLAGS:
op.Flags = native.Uint32(na.Value)
}
}
ops = append(ops, op)
}
return ops, nil
}
func parseMulticastGroups(b []byte) ([]GenlMulticastGroup, error) {
attrs, err := nl.ParseRouteAttr(b)
if err != nil {
return nil, err
}
groups := make([]GenlMulticastGroup, 0, len(attrs))
for _, a := range attrs {
nattrs, err := nl.ParseRouteAttr(a.Value)
if err != nil {
return nil, err
}
var g GenlMulticastGroup
for _, na := range nattrs {
switch na.Attr.Type {
case nl.GENL_CTRL_ATTR_MCAST_GRP_NAME:
g.Name = nl.BytesToString(na.Value)
case nl.GENL_CTRL_ATTR_MCAST_GRP_ID:
g.ID = native.Uint32(na.Value)
}
}
groups = append(groups, g)
}
return groups, nil
}
func (f *GenlFamily) parseAttributes(attrs []syscall.NetlinkRouteAttr) error {
for _, a := range attrs {
switch a.Attr.Type {
case nl.GENL_CTRL_ATTR_FAMILY_NAME:
f.Name = nl.BytesToString(a.Value)
case nl.GENL_CTRL_ATTR_FAMILY_ID:
f.ID = native.Uint16(a.Value)
case nl.GENL_CTRL_ATTR_VERSION:
f.Version = native.Uint32(a.Value)
case nl.GENL_CTRL_ATTR_HDRSIZE:
f.HdrSize = native.Uint32(a.Value)
case nl.GENL_CTRL_ATTR_MAXATTR:
f.MaxAttr = native.Uint32(a.Value)
case nl.GENL_CTRL_ATTR_OPS:
ops, err := parseOps(a.Value)
if err != nil {
return err
}
f.Ops = ops
case nl.GENL_CTRL_ATTR_MCAST_GROUPS:
groups, err := parseMulticastGroups(a.Value)
if err != nil {
return err
}
f.Groups = groups
}
}
return nil
}
func parseFamilies(msgs [][]byte) ([]*GenlFamily, error) {
families := make([]*GenlFamily, 0, len(msgs))
for _, m := range msgs {
attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
if err != nil {
return nil, err
}
family := &GenlFamily{}
if err := family.parseAttributes(attrs); err != nil {
return nil, err
}
families = append(families, family)
}
return families, nil
}
func (h *Handle) GenlFamilyList() ([]*GenlFamily, error) {
msg := &nl.Genlmsg{
Command: nl.GENL_CTRL_CMD_GETFAMILY,
Version: nl.GENL_CTRL_VERSION,
}
req := h.newNetlinkRequest(nl.GENL_ID_CTRL, syscall.NLM_F_DUMP)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
return parseFamilies(msgs)
}
func GenlFamilyList() ([]*GenlFamily, error) {
return pkgHandle.GenlFamilyList()
}
func (h *Handle) GenlFamilyGet(name string) (*GenlFamily, error) {
msg := &nl.Genlmsg{
Command: nl.GENL_CTRL_CMD_GETFAMILY,
Version: nl.GENL_CTRL_VERSION,
}
req := h.newNetlinkRequest(nl.GENL_ID_CTRL, 0)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_CTRL_ATTR_FAMILY_NAME, nl.ZeroTerminated(name)))
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
families, err := parseFamilies(msgs)
if len(families) != 1 {
return nil, fmt.Errorf("invalid response for GENL_CTRL_CMD_GETFAMILY")
}
return families[0], nil
}
func GenlFamilyGet(name string) (*GenlFamily, error) {
return pkgHandle.GenlFamilyGet(name)
}

View file

@ -0,0 +1,25 @@
// +build !linux
package netlink
type GenlOp struct{}
type GenlMulticastGroup struct{}
type GenlFamily struct{}
func (h *Handle) GenlFamilyList() ([]*GenlFamily, error) {
return nil, ErrNotImplemented
}
func GenlFamilyList() ([]*GenlFamily, error) {
return nil, ErrNotImplemented
}
func (h *Handle) GenlFamilyGet(name string) (*GenlFamily, error) {
return nil, ErrNotImplemented
}
func GenlFamilyGet(name string) (*GenlFamily, error) {
return nil, ErrNotImplemented
}

238
vendor/github.com/vishvananda/netlink/gtp_linux.go generated vendored Normal file
View file

@ -0,0 +1,238 @@
package netlink
import (
"fmt"
"net"
"strings"
"syscall"
"github.com/vishvananda/netlink/nl"
)
type PDP struct {
Version uint32
TID uint64
PeerAddress net.IP
MSAddress net.IP
Flow uint16
NetNSFD uint32
ITEI uint32
OTEI uint32
}
func (pdp *PDP) String() string {
elems := []string{}
elems = append(elems, fmt.Sprintf("Version: %d", pdp.Version))
if pdp.Version == 0 {
elems = append(elems, fmt.Sprintf("TID: %d", pdp.TID))
} else if pdp.Version == 1 {
elems = append(elems, fmt.Sprintf("TEI: %d/%d", pdp.ITEI, pdp.OTEI))
}
elems = append(elems, fmt.Sprintf("MS-Address: %s", pdp.MSAddress))
elems = append(elems, fmt.Sprintf("Peer-Address: %s", pdp.PeerAddress))
return fmt.Sprintf("{%s}", strings.Join(elems, " "))
}
func (p *PDP) parseAttributes(attrs []syscall.NetlinkRouteAttr) error {
for _, a := range attrs {
switch a.Attr.Type {
case nl.GENL_GTP_ATTR_VERSION:
p.Version = native.Uint32(a.Value)
case nl.GENL_GTP_ATTR_TID:
p.TID = native.Uint64(a.Value)
case nl.GENL_GTP_ATTR_PEER_ADDRESS:
p.PeerAddress = net.IP(a.Value)
case nl.GENL_GTP_ATTR_MS_ADDRESS:
p.MSAddress = net.IP(a.Value)
case nl.GENL_GTP_ATTR_FLOW:
p.Flow = native.Uint16(a.Value)
case nl.GENL_GTP_ATTR_NET_NS_FD:
p.NetNSFD = native.Uint32(a.Value)
case nl.GENL_GTP_ATTR_I_TEI:
p.ITEI = native.Uint32(a.Value)
case nl.GENL_GTP_ATTR_O_TEI:
p.OTEI = native.Uint32(a.Value)
}
}
return nil
}
func parsePDP(msgs [][]byte) ([]*PDP, error) {
pdps := make([]*PDP, 0, len(msgs))
for _, m := range msgs {
attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
if err != nil {
return nil, err
}
pdp := &PDP{}
if err := pdp.parseAttributes(attrs); err != nil {
return nil, err
}
pdps = append(pdps, pdp)
}
return pdps, nil
}
func (h *Handle) GTPPDPList() ([]*PDP, error) {
f, err := h.GenlFamilyGet(nl.GENL_GTP_NAME)
if err != nil {
return nil, err
}
msg := &nl.Genlmsg{
Command: nl.GENL_GTP_CMD_GETPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), syscall.NLM_F_DUMP)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
return parsePDP(msgs)
}
func GTPPDPList() ([]*PDP, error) {
return pkgHandle.GTPPDPList()
}
func gtpPDPGet(req *nl.NetlinkRequest) (*PDP, error) {
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
pdps, err := parsePDP(msgs)
if err != nil {
return nil, err
}
if len(pdps) != 1 {
return nil, fmt.Errorf("invalid reqponse for GENL_GTP_CMD_GETPDP")
}
return pdps[0], nil
}
func (h *Handle) GTPPDPByTID(link Link, tid int) (*PDP, error) {
f, err := h.GenlFamilyGet(nl.GENL_GTP_NAME)
if err != nil {
return nil, err
}
msg := &nl.Genlmsg{
Command: nl.GENL_GTP_CMD_GETPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), 0)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_VERSION, nl.Uint32Attr(0)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_LINK, nl.Uint32Attr(uint32(link.Attrs().Index))))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_TID, nl.Uint64Attr(uint64(tid))))
return gtpPDPGet(req)
}
func GTPPDPByTID(link Link, tid int) (*PDP, error) {
return pkgHandle.GTPPDPByTID(link, tid)
}
func (h *Handle) GTPPDPByITEI(link Link, itei int) (*PDP, error) {
f, err := h.GenlFamilyGet(nl.GENL_GTP_NAME)
if err != nil {
return nil, err
}
msg := &nl.Genlmsg{
Command: nl.GENL_GTP_CMD_GETPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), 0)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_VERSION, nl.Uint32Attr(1)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_LINK, nl.Uint32Attr(uint32(link.Attrs().Index))))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_I_TEI, nl.Uint32Attr(uint32(itei))))
return gtpPDPGet(req)
}
func GTPPDPByITEI(link Link, itei int) (*PDP, error) {
return pkgHandle.GTPPDPByITEI(link, itei)
}
func (h *Handle) GTPPDPByMSAddress(link Link, addr net.IP) (*PDP, error) {
f, err := h.GenlFamilyGet(nl.GENL_GTP_NAME)
if err != nil {
return nil, err
}
msg := &nl.Genlmsg{
Command: nl.GENL_GTP_CMD_GETPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), 0)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_VERSION, nl.Uint32Attr(0)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_LINK, nl.Uint32Attr(uint32(link.Attrs().Index))))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_MS_ADDRESS, []byte(addr.To4())))
return gtpPDPGet(req)
}
func GTPPDPByMSAddress(link Link, addr net.IP) (*PDP, error) {
return pkgHandle.GTPPDPByMSAddress(link, addr)
}
func (h *Handle) GTPPDPAdd(link Link, pdp *PDP) error {
f, err := h.GenlFamilyGet(nl.GENL_GTP_NAME)
if err != nil {
return err
}
msg := &nl.Genlmsg{
Command: nl.GENL_GTP_CMD_NEWPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_VERSION, nl.Uint32Attr(pdp.Version)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_LINK, nl.Uint32Attr(uint32(link.Attrs().Index))))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_PEER_ADDRESS, []byte(pdp.PeerAddress.To4())))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_MS_ADDRESS, []byte(pdp.MSAddress.To4())))
switch pdp.Version {
case 0:
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_TID, nl.Uint64Attr(pdp.TID)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_FLOW, nl.Uint16Attr(pdp.Flow)))
case 1:
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_I_TEI, nl.Uint32Attr(pdp.ITEI)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_O_TEI, nl.Uint32Attr(pdp.OTEI)))
default:
return fmt.Errorf("unsupported GTP version: %d", pdp.Version)
}
_, err = req.Execute(syscall.NETLINK_GENERIC, 0)
return err
}
func GTPPDPAdd(link Link, pdp *PDP) error {
return pkgHandle.GTPPDPAdd(link, pdp)
}
func (h *Handle) GTPPDPDel(link Link, pdp *PDP) error {
f, err := h.GenlFamilyGet(nl.GENL_GTP_NAME)
if err != nil {
return err
}
msg := &nl.Genlmsg{
Command: nl.GENL_GTP_CMD_DELPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_VERSION, nl.Uint32Attr(pdp.Version)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_LINK, nl.Uint32Attr(uint32(link.Attrs().Index))))
switch pdp.Version {
case 0:
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_TID, nl.Uint64Attr(pdp.TID)))
case 1:
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_I_TEI, nl.Uint32Attr(pdp.ITEI)))
default:
return fmt.Errorf("unsupported GTP version: %d", pdp.Version)
}
_, err = req.Execute(syscall.NETLINK_GENERIC, 0)
return err
}
func GTPPDPDel(link Link, pdp *PDP) error {
return pkgHandle.GTPPDPDel(link, pdp)
}

111
vendor/github.com/vishvananda/netlink/handle_linux.go generated vendored Normal file
View file

@ -0,0 +1,111 @@
package netlink
import (
"fmt"
"syscall"
"time"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)
// Empty handle used by the netlink package methods
var pkgHandle = &Handle{}
// Handle is an handle for the netlink requests on a
// specific network namespace. All the requests on the
// same netlink family share the same netlink socket,
// which gets released when the handle is deleted.
type Handle struct {
sockets map[int]*nl.SocketHandle
lookupByDump bool
}
// SupportsNetlinkFamily reports whether the passed netlink family is supported by this Handle
func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
_, ok := h.sockets[nlFamily]
return ok
}
// NewHandle returns a netlink handle on the current network namespace.
// Caller may specify the netlink families the handle should support.
// If no families are specified, all the families the netlink package
// supports will be automatically added.
func NewHandle(nlFamilies ...int) (*Handle, error) {
return newHandle(netns.None(), netns.None(), nlFamilies...)
}
// SetSocketTimeout sets the send and receive timeout for each socket in the
// netlink handle. Although the socket timeout has granularity of one
// microsecond, the effective granularity is floored by the kernel timer tick,
// which default value is four milliseconds.
func (h *Handle) SetSocketTimeout(to time.Duration) error {
if to < time.Microsecond {
return fmt.Errorf("invalid timeout, minimul value is %s", time.Microsecond)
}
tv := syscall.NsecToTimeval(to.Nanoseconds())
for _, sh := range h.sockets {
fd := sh.Socket.GetFd()
err := syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, &tv)
if err != nil {
return err
}
err = syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, &tv)
if err != nil {
return err
}
}
return nil
}
// NewHandle returns a netlink handle on the network namespace
// specified by ns. If ns=netns.None(), current network namespace
// will be assumed
func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (*Handle, error) {
return newHandle(ns, netns.None(), nlFamilies...)
}
// NewHandleAtFrom works as NewHandle but allows client to specify the
// new and the origin netns Handle.
func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
return newHandle(newNs, curNs)
}
func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error) {
h := &Handle{sockets: map[int]*nl.SocketHandle{}}
fams := nl.SupportedNlFamilies
if len(nlFamilies) != 0 {
fams = nlFamilies
}
for _, f := range fams {
s, err := nl.GetNetlinkSocketAt(newNs, curNs, f)
if err != nil {
return nil, err
}
h.sockets[f] = &nl.SocketHandle{Socket: s}
}
return h, nil
}
// Delete releases the resources allocated to this handle
func (h *Handle) Delete() {
for _, sh := range h.sockets {
sh.Close()
}
h.sockets = nil
}
func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
// Do this so that package API still use nl package variable nextSeqNr
if h.sockets == nil {
return nl.NewNetlinkRequest(proto, flags)
}
return &nl.NetlinkRequest{
NlMsghdr: syscall.NlMsghdr{
Len: uint32(syscall.SizeofNlMsghdr),
Type: uint16(proto),
Flags: syscall.NLM_F_REQUEST | uint16(flags),
},
Sockets: h.sockets,
}
}

View file

@ -0,0 +1,218 @@
// +build !linux
package netlink
import (
"net"
"time"
"github.com/vishvananda/netns"
)
type Handle struct{}
func NewHandle(nlFamilies ...int) (*Handle, error) {
return nil, ErrNotImplemented
}
func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (*Handle, error) {
return nil, ErrNotImplemented
}
func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
return nil, ErrNotImplemented
}
func (h *Handle) Delete() {}
func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
return false
}
func (h *Handle) SetSocketTimeout(to time.Duration) error {
return ErrNotImplemented
}
func (h *Handle) SetPromiscOn(link Link) error {
return ErrNotImplemented
}
func (h *Handle) SetPromiscOff(link Link) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetUp(link Link) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetDown(link Link) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetMTU(link Link, mtu int) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetName(link Link, name string) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetAlias(link Link, name string) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetVfVlan(link Link, vf, vlan int) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetMaster(link Link, master *Bridge) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetNoMaster(link Link) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetMasterByIndex(link Link, masterIndex int) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetNsPid(link Link, nspid int) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetNsFd(link Link, fd int) error {
return ErrNotImplemented
}
func (h *Handle) LinkAdd(link Link) error {
return ErrNotImplemented
}
func (h *Handle) LinkDel(link Link) error {
return ErrNotImplemented
}
func (h *Handle) LinkByName(name string) (Link, error) {
return nil, ErrNotImplemented
}
func (h *Handle) LinkByAlias(alias string) (Link, error) {
return nil, ErrNotImplemented
}
func (h *Handle) LinkByIndex(index int) (Link, error) {
return nil, ErrNotImplemented
}
func (h *Handle) LinkList() ([]Link, error) {
return nil, ErrNotImplemented
}
func (h *Handle) LinkSetHairpin(link Link, mode bool) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetGuard(link Link, mode bool) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetFastLeave(link Link, mode bool) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetLearning(link Link, mode bool) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetRootBlock(link Link, mode bool) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetFlood(link Link, mode bool) error {
return ErrNotImplemented
}
func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error {
return ErrNotImplemented
}
func (h *Handle) AddrAdd(link Link, addr *Addr) error {
return ErrNotImplemented
}
func (h *Handle) AddrDel(link Link, addr *Addr) error {
return ErrNotImplemented
}
func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
return nil, ErrNotImplemented
}
func (h *Handle) ClassDel(class Class) error {
return ErrNotImplemented
}
func (h *Handle) ClassChange(class Class) error {
return ErrNotImplemented
}
func (h *Handle) ClassReplace(class Class) error {
return ErrNotImplemented
}
func (h *Handle) ClassAdd(class Class) error {
return ErrNotImplemented
}
func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
return nil, ErrNotImplemented
}
func (h *Handle) FilterDel(filter Filter) error {
return ErrNotImplemented
}
func (h *Handle) FilterAdd(filter Filter) error {
return ErrNotImplemented
}
func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
return nil, ErrNotImplemented
}
func (h *Handle) NeighAdd(neigh *Neigh) error {
return ErrNotImplemented
}
func (h *Handle) NeighSet(neigh *Neigh) error {
return ErrNotImplemented
}
func (h *Handle) NeighAppend(neigh *Neigh) error {
return ErrNotImplemented
}
func (h *Handle) NeighDel(neigh *Neigh) error {
return ErrNotImplemented
}
func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) {
return nil, ErrNotImplemented
}
func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) {
return nil, ErrNotImplemented
}

776
vendor/github.com/vishvananda/netlink/link.go generated vendored Normal file
View file

@ -0,0 +1,776 @@
package netlink
import (
"fmt"
"net"
)
// Link represents a link device from netlink. Shared link attributes
// like name may be retrieved using the Attrs() method. Unique data
// can be retrieved by casting the object to the proper type.
type Link interface {
Attrs() *LinkAttrs
Type() string
}
type (
NsPid int
NsFd int
)
// LinkAttrs represents data shared by most link types
type LinkAttrs struct {
Index int
MTU int
TxQLen int // Transmit Queue Length
Name string
HardwareAddr net.HardwareAddr
Flags net.Flags
RawFlags uint32
ParentIndex int // index of the parent link device
MasterIndex int // must be the index of a bridge
Namespace interface{} // nil | NsPid | NsFd
Alias string
Statistics *LinkStatistics
Promisc int
Xdp *LinkXdp
EncapType string
Protinfo *Protinfo
OperState LinkOperState
}
// LinkOperState represents the values of the IFLA_OPERSTATE link
// attribute, which contains the RFC2863 state of the interface.
type LinkOperState uint8
const (
OperUnknown = iota // Status can't be determined.
OperNotPresent // Some component is missing.
OperDown // Down.
OperLowerLayerDown // Down due to state of lower layer.
OperTesting // In some test mode.
OperDormant // Not up but pending an external event.
OperUp // Up, ready to send packets.
)
func (s LinkOperState) String() string {
switch s {
case OperNotPresent:
return "not-present"
case OperDown:
return "down"
case OperLowerLayerDown:
return "lower-layer-down"
case OperTesting:
return "testing"
case OperDormant:
return "dormant"
case OperUp:
return "up"
default:
return "unknown"
}
}
// NewLinkAttrs returns LinkAttrs structure filled with default values
func NewLinkAttrs() LinkAttrs {
return LinkAttrs{
TxQLen: -1,
}
}
type LinkStatistics LinkStatistics64
/*
Ref: struct rtnl_link_stats {...}
*/
type LinkStatistics32 struct {
RxPackets uint32
TxPackets uint32
RxBytes uint32
TxBytes uint32
RxErrors uint32
TxErrors uint32
RxDropped uint32
TxDropped uint32
Multicast uint32
Collisions uint32
RxLengthErrors uint32
RxOverErrors uint32
RxCrcErrors uint32
RxFrameErrors uint32
RxFifoErrors uint32
RxMissedErrors uint32
TxAbortedErrors uint32
TxCarrierErrors uint32
TxFifoErrors uint32
TxHeartbeatErrors uint32
TxWindowErrors uint32
RxCompressed uint32
TxCompressed uint32
}
func (s32 LinkStatistics32) to64() *LinkStatistics64 {
return &LinkStatistics64{
RxPackets: uint64(s32.RxPackets),
TxPackets: uint64(s32.TxPackets),
RxBytes: uint64(s32.RxBytes),
TxBytes: uint64(s32.TxBytes),
RxErrors: uint64(s32.RxErrors),
TxErrors: uint64(s32.TxErrors),
RxDropped: uint64(s32.RxDropped),
TxDropped: uint64(s32.TxDropped),
Multicast: uint64(s32.Multicast),
Collisions: uint64(s32.Collisions),
RxLengthErrors: uint64(s32.RxLengthErrors),
RxOverErrors: uint64(s32.RxOverErrors),
RxCrcErrors: uint64(s32.RxCrcErrors),
RxFrameErrors: uint64(s32.RxFrameErrors),
RxFifoErrors: uint64(s32.RxFifoErrors),
RxMissedErrors: uint64(s32.RxMissedErrors),
TxAbortedErrors: uint64(s32.TxAbortedErrors),
TxCarrierErrors: uint64(s32.TxCarrierErrors),
TxFifoErrors: uint64(s32.TxFifoErrors),
TxHeartbeatErrors: uint64(s32.TxHeartbeatErrors),
TxWindowErrors: uint64(s32.TxWindowErrors),
RxCompressed: uint64(s32.RxCompressed),
TxCompressed: uint64(s32.TxCompressed),
}
}
/*
Ref: struct rtnl_link_stats64 {...}
*/
type LinkStatistics64 struct {
RxPackets uint64
TxPackets uint64
RxBytes uint64
TxBytes uint64
RxErrors uint64
TxErrors uint64
RxDropped uint64
TxDropped uint64
Multicast uint64
Collisions uint64
RxLengthErrors uint64
RxOverErrors uint64
RxCrcErrors uint64
RxFrameErrors uint64
RxFifoErrors uint64
RxMissedErrors uint64
TxAbortedErrors uint64
TxCarrierErrors uint64
TxFifoErrors uint64
TxHeartbeatErrors uint64
TxWindowErrors uint64
RxCompressed uint64
TxCompressed uint64
}
type LinkXdp struct {
Fd int
Attached bool
Flags uint32
ProgId uint32
}
// Device links cannot be created via netlink. These links
// are links created by udev like 'lo' and 'etho0'
type Device struct {
LinkAttrs
}
func (device *Device) Attrs() *LinkAttrs {
return &device.LinkAttrs
}
func (device *Device) Type() string {
return "device"
}
// Dummy links are dummy ethernet devices
type Dummy struct {
LinkAttrs
}
func (dummy *Dummy) Attrs() *LinkAttrs {
return &dummy.LinkAttrs
}
func (dummy *Dummy) Type() string {
return "dummy"
}
// Ifb links are advanced dummy devices for packet filtering
type Ifb struct {
LinkAttrs
}
func (ifb *Ifb) Attrs() *LinkAttrs {
return &ifb.LinkAttrs
}
func (ifb *Ifb) Type() string {
return "ifb"
}
// Bridge links are simple linux bridges
type Bridge struct {
LinkAttrs
MulticastSnooping *bool
HelloTime *uint32
}
func (bridge *Bridge) Attrs() *LinkAttrs {
return &bridge.LinkAttrs
}
func (bridge *Bridge) Type() string {
return "bridge"
}
// Vlan links have ParentIndex set in their Attrs()
type Vlan struct {
LinkAttrs
VlanId int
}
func (vlan *Vlan) Attrs() *LinkAttrs {
return &vlan.LinkAttrs
}
func (vlan *Vlan) Type() string {
return "vlan"
}
type MacvlanMode uint16
const (
MACVLAN_MODE_DEFAULT MacvlanMode = iota
MACVLAN_MODE_PRIVATE
MACVLAN_MODE_VEPA
MACVLAN_MODE_BRIDGE
MACVLAN_MODE_PASSTHRU
MACVLAN_MODE_SOURCE
)
// Macvlan links have ParentIndex set in their Attrs()
type Macvlan struct {
LinkAttrs
Mode MacvlanMode
}
func (macvlan *Macvlan) Attrs() *LinkAttrs {
return &macvlan.LinkAttrs
}
func (macvlan *Macvlan) Type() string {
return "macvlan"
}
// Macvtap - macvtap is a virtual interfaces based on macvlan
type Macvtap struct {
Macvlan
}
func (macvtap Macvtap) Type() string {
return "macvtap"
}
type TuntapMode uint16
type TuntapFlag uint16
// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
type Tuntap struct {
LinkAttrs
Mode TuntapMode
Flags TuntapFlag
}
func (tuntap *Tuntap) Attrs() *LinkAttrs {
return &tuntap.LinkAttrs
}
func (tuntap *Tuntap) Type() string {
return "tuntap"
}
// Veth devices must specify PeerName on create
type Veth struct {
LinkAttrs
PeerName string // veth on create only
}
func (veth *Veth) Attrs() *LinkAttrs {
return &veth.LinkAttrs
}
func (veth *Veth) Type() string {
return "veth"
}
// GenericLink links represent types that are not currently understood
// by this netlink library.
type GenericLink struct {
LinkAttrs
LinkType string
}
func (generic *GenericLink) Attrs() *LinkAttrs {
return &generic.LinkAttrs
}
func (generic *GenericLink) Type() string {
return generic.LinkType
}
type Vxlan struct {
LinkAttrs
VxlanId int
VtepDevIndex int
SrcAddr net.IP
Group net.IP
TTL int
TOS int
Learning bool
Proxy bool
RSC bool
L2miss bool
L3miss bool
UDPCSum bool
NoAge bool
GBP bool
FlowBased bool
Age int
Limit int
Port int
PortLow int
PortHigh int
}
func (vxlan *Vxlan) Attrs() *LinkAttrs {
return &vxlan.LinkAttrs
}
func (vxlan *Vxlan) Type() string {
return "vxlan"
}
type IPVlanMode uint16
const (
IPVLAN_MODE_L2 IPVlanMode = iota
IPVLAN_MODE_L3
IPVLAN_MODE_L3S
IPVLAN_MODE_MAX
)
type IPVlan struct {
LinkAttrs
Mode IPVlanMode
}
func (ipvlan *IPVlan) Attrs() *LinkAttrs {
return &ipvlan.LinkAttrs
}
func (ipvlan *IPVlan) Type() string {
return "ipvlan"
}
// BondMode type
type BondMode int
func (b BondMode) String() string {
s, ok := bondModeToString[b]
if !ok {
return fmt.Sprintf("BondMode(%d)", b)
}
return s
}
// StringToBondMode returns bond mode, or uknonw is the s is invalid.
func StringToBondMode(s string) BondMode {
mode, ok := StringToBondModeMap[s]
if !ok {
return BOND_MODE_UNKNOWN
}
return mode
}
// Possible BondMode
const (
BOND_MODE_BALANCE_RR BondMode = iota
BOND_MODE_ACTIVE_BACKUP
BOND_MODE_BALANCE_XOR
BOND_MODE_BROADCAST
BOND_MODE_802_3AD
BOND_MODE_BALANCE_TLB
BOND_MODE_BALANCE_ALB
BOND_MODE_UNKNOWN
)
var bondModeToString = map[BondMode]string{
BOND_MODE_BALANCE_RR: "balance-rr",
BOND_MODE_ACTIVE_BACKUP: "active-backup",
BOND_MODE_BALANCE_XOR: "balance-xor",
BOND_MODE_BROADCAST: "broadcast",
BOND_MODE_802_3AD: "802.3ad",
BOND_MODE_BALANCE_TLB: "balance-tlb",
BOND_MODE_BALANCE_ALB: "balance-alb",
}
var StringToBondModeMap = map[string]BondMode{
"balance-rr": BOND_MODE_BALANCE_RR,
"active-backup": BOND_MODE_ACTIVE_BACKUP,
"balance-xor": BOND_MODE_BALANCE_XOR,
"broadcast": BOND_MODE_BROADCAST,
"802.3ad": BOND_MODE_802_3AD,
"balance-tlb": BOND_MODE_BALANCE_TLB,
"balance-alb": BOND_MODE_BALANCE_ALB,
}
// BondArpValidate type
type BondArpValidate int
// Possible BondArpValidate value
const (
BOND_ARP_VALIDATE_NONE BondArpValidate = iota
BOND_ARP_VALIDATE_ACTIVE
BOND_ARP_VALIDATE_BACKUP
BOND_ARP_VALIDATE_ALL
)
// BondPrimaryReselect type
type BondPrimaryReselect int
// Possible BondPrimaryReselect value
const (
BOND_PRIMARY_RESELECT_ALWAYS BondPrimaryReselect = iota
BOND_PRIMARY_RESELECT_BETTER
BOND_PRIMARY_RESELECT_FAILURE
)
// BondArpAllTargets type
type BondArpAllTargets int
// Possible BondArpAllTargets value
const (
BOND_ARP_ALL_TARGETS_ANY BondArpAllTargets = iota
BOND_ARP_ALL_TARGETS_ALL
)
// BondFailOverMac type
type BondFailOverMac int
// Possible BondFailOverMac value
const (
BOND_FAIL_OVER_MAC_NONE BondFailOverMac = iota
BOND_FAIL_OVER_MAC_ACTIVE
BOND_FAIL_OVER_MAC_FOLLOW
)
// BondXmitHashPolicy type
type BondXmitHashPolicy int
func (b BondXmitHashPolicy) String() string {
s, ok := bondXmitHashPolicyToString[b]
if !ok {
return fmt.Sprintf("XmitHashPolicy(%d)", b)
}
return s
}
// StringToBondXmitHashPolicy returns bond lacp arte, or uknonw is the s is invalid.
func StringToBondXmitHashPolicy(s string) BondXmitHashPolicy {
lacp, ok := StringToBondXmitHashPolicyMap[s]
if !ok {
return BOND_XMIT_HASH_POLICY_UNKNOWN
}
return lacp
}
// Possible BondXmitHashPolicy value
const (
BOND_XMIT_HASH_POLICY_LAYER2 BondXmitHashPolicy = iota
BOND_XMIT_HASH_POLICY_LAYER3_4
BOND_XMIT_HASH_POLICY_LAYER2_3
BOND_XMIT_HASH_POLICY_ENCAP2_3
BOND_XMIT_HASH_POLICY_ENCAP3_4
BOND_XMIT_HASH_POLICY_UNKNOWN
)
var bondXmitHashPolicyToString = map[BondXmitHashPolicy]string{
BOND_XMIT_HASH_POLICY_LAYER2: "layer2",
BOND_XMIT_HASH_POLICY_LAYER3_4: "layer3+4",
BOND_XMIT_HASH_POLICY_LAYER2_3: "layer2+3",
BOND_XMIT_HASH_POLICY_ENCAP2_3: "encap2+3",
BOND_XMIT_HASH_POLICY_ENCAP3_4: "encap3+4",
}
var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{
"layer2": BOND_XMIT_HASH_POLICY_LAYER2,
"layer3+4": BOND_XMIT_HASH_POLICY_LAYER3_4,
"layer2+3": BOND_XMIT_HASH_POLICY_LAYER2_3,
"encap2+3": BOND_XMIT_HASH_POLICY_ENCAP2_3,
"encap3+4": BOND_XMIT_HASH_POLICY_ENCAP3_4,
}
// BondLacpRate type
type BondLacpRate int
func (b BondLacpRate) String() string {
s, ok := bondLacpRateToString[b]
if !ok {
return fmt.Sprintf("LacpRate(%d)", b)
}
return s
}
// StringToBondLacpRate returns bond lacp arte, or uknonw is the s is invalid.
func StringToBondLacpRate(s string) BondLacpRate {
lacp, ok := StringToBondLacpRateMap[s]
if !ok {
return BOND_LACP_RATE_UNKNOWN
}
return lacp
}
// Possible BondLacpRate value
const (
BOND_LACP_RATE_SLOW BondLacpRate = iota
BOND_LACP_RATE_FAST
BOND_LACP_RATE_UNKNOWN
)
var bondLacpRateToString = map[BondLacpRate]string{
BOND_LACP_RATE_SLOW: "slow",
BOND_LACP_RATE_FAST: "fast",
}
var StringToBondLacpRateMap = map[string]BondLacpRate{
"slow": BOND_LACP_RATE_SLOW,
"fast": BOND_LACP_RATE_FAST,
}
// BondAdSelect type
type BondAdSelect int
// Possible BondAdSelect value
const (
BOND_AD_SELECT_STABLE BondAdSelect = iota
BOND_AD_SELECT_BANDWIDTH
BOND_AD_SELECT_COUNT
)
// BondAdInfo represents ad info for bond
type BondAdInfo struct {
AggregatorId int
NumPorts int
ActorKey int
PartnerKey int
PartnerMac net.HardwareAddr
}
// Bond representation
type Bond struct {
LinkAttrs
Mode BondMode
ActiveSlave int
Miimon int
UpDelay int
DownDelay int
UseCarrier int
ArpInterval int
ArpIpTargets []net.IP
ArpValidate BondArpValidate
ArpAllTargets BondArpAllTargets
Primary int
PrimaryReselect BondPrimaryReselect
FailOverMac BondFailOverMac
XmitHashPolicy BondXmitHashPolicy
ResendIgmp int
NumPeerNotif int
AllSlavesActive int
MinLinks int
LpInterval int
PackersPerSlave int
LacpRate BondLacpRate
AdSelect BondAdSelect
// looking at iproute tool AdInfo can only be retrived. It can't be set.
AdInfo *BondAdInfo
AdActorSysPrio int
AdUserPortKey int
AdActorSystem net.HardwareAddr
TlbDynamicLb int
}
func NewLinkBond(atr LinkAttrs) *Bond {
return &Bond{
LinkAttrs: atr,
Mode: -1,
ActiveSlave: -1,
Miimon: -1,
UpDelay: -1,
DownDelay: -1,
UseCarrier: -1,
ArpInterval: -1,
ArpIpTargets: nil,
ArpValidate: -1,
ArpAllTargets: -1,
Primary: -1,
PrimaryReselect: -1,
FailOverMac: -1,
XmitHashPolicy: -1,
ResendIgmp: -1,
NumPeerNotif: -1,
AllSlavesActive: -1,
MinLinks: -1,
LpInterval: -1,
PackersPerSlave: -1,
LacpRate: -1,
AdSelect: -1,
AdActorSysPrio: -1,
AdUserPortKey: -1,
AdActorSystem: nil,
TlbDynamicLb: -1,
}
}
// Flag mask for bond options. Bond.Flagmask must be set to on for option to work.
const (
BOND_MODE_MASK uint64 = 1 << (1 + iota)
BOND_ACTIVE_SLAVE_MASK
BOND_MIIMON_MASK
BOND_UPDELAY_MASK
BOND_DOWNDELAY_MASK
BOND_USE_CARRIER_MASK
BOND_ARP_INTERVAL_MASK
BOND_ARP_VALIDATE_MASK
BOND_ARP_ALL_TARGETS_MASK
BOND_PRIMARY_MASK
BOND_PRIMARY_RESELECT_MASK
BOND_FAIL_OVER_MAC_MASK
BOND_XMIT_HASH_POLICY_MASK
BOND_RESEND_IGMP_MASK
BOND_NUM_PEER_NOTIF_MASK
BOND_ALL_SLAVES_ACTIVE_MASK
BOND_MIN_LINKS_MASK
BOND_LP_INTERVAL_MASK
BOND_PACKETS_PER_SLAVE_MASK
BOND_LACP_RATE_MASK
BOND_AD_SELECT_MASK
)
// Attrs implementation.
func (bond *Bond) Attrs() *LinkAttrs {
return &bond.LinkAttrs
}
// Type implementation fro Vxlan.
func (bond *Bond) Type() string {
return "bond"
}
// Gretap devices must specify LocalIP and RemoteIP on create
type Gretap struct {
LinkAttrs
IKey uint32
OKey uint32
EncapSport uint16
EncapDport uint16
Local net.IP
Remote net.IP
IFlags uint16
OFlags uint16
PMtuDisc uint8
Ttl uint8
Tos uint8
EncapType uint16
EncapFlags uint16
Link uint32
FlowBased bool
}
func (gretap *Gretap) Attrs() *LinkAttrs {
return &gretap.LinkAttrs
}
func (gretap *Gretap) Type() string {
return "gretap"
}
type Iptun struct {
LinkAttrs
Ttl uint8
Tos uint8
PMtuDisc uint8
Link uint32
Local net.IP
Remote net.IP
}
func (iptun *Iptun) Attrs() *LinkAttrs {
return &iptun.LinkAttrs
}
func (iptun *Iptun) Type() string {
return "ipip"
}
type Vti struct {
LinkAttrs
IKey uint32
OKey uint32
Link uint32
Local net.IP
Remote net.IP
}
func (vti *Vti) Attrs() *LinkAttrs {
return &vti.LinkAttrs
}
func (iptun *Vti) Type() string {
return "vti"
}
type Vrf struct {
LinkAttrs
Table uint32
}
func (vrf *Vrf) Attrs() *LinkAttrs {
return &vrf.LinkAttrs
}
func (vrf *Vrf) Type() string {
return "vrf"
}
type GTP struct {
LinkAttrs
FD0 int
FD1 int
Role int
PDPHashsize int
}
func (gtp *GTP) Attrs() *LinkAttrs {
return &gtp.LinkAttrs
}
func (gtp *GTP) Type() string {
return "gtp"
}
// iproute2 supported devices;
// vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
// bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |
// gre | gretap | ip6gre | ip6gretap | vti | nlmon |
// bond_slave | ipvlan
// LinkNotFoundError wraps the various not found errors when
// getting/reading links. This is intended for better error
// handling by dependent code so that "not found error" can
// be distinguished from other errors
type LinkNotFoundError struct {
error
}

1819
vendor/github.com/vishvananda/netlink/link_linux.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,14 @@
package netlink
// ideally golang.org/x/sys/unix would define IfReq but it only has
// IFNAMSIZ, hence this minimalistic implementation
const (
SizeOfIfReq = 40
IFNAMSIZ = 16
)
type ifReq struct {
Name [IFNAMSIZ]byte
Flags uint16
pad [SizeOfIfReq - IFNAMSIZ - 2]byte
}

22
vendor/github.com/vishvananda/netlink/neigh.go generated vendored Normal file
View file

@ -0,0 +1,22 @@
package netlink
import (
"fmt"
"net"
)
// Neigh represents a link layer neighbor from netlink.
type Neigh struct {
LinkIndex int
Family int
State int
Type int
Flags int
IP net.IP
HardwareAddr net.HardwareAddr
}
// String returns $ip/$hwaddr $label
func (neigh *Neigh) String() string {
return fmt.Sprintf("%s %s", neigh.IP, neigh.HardwareAddr)
}

250
vendor/github.com/vishvananda/netlink/neigh_linux.go generated vendored Normal file
View file

@ -0,0 +1,250 @@
package netlink
import (
"net"
"syscall"
"unsafe"
"github.com/vishvananda/netlink/nl"
)
const (
NDA_UNSPEC = iota
NDA_DST
NDA_LLADDR
NDA_CACHEINFO
NDA_PROBES
NDA_VLAN
NDA_PORT
NDA_VNI
NDA_IFINDEX
NDA_MAX = NDA_IFINDEX
)
// Neighbor Cache Entry States.
const (
NUD_NONE = 0x00
NUD_INCOMPLETE = 0x01
NUD_REACHABLE = 0x02
NUD_STALE = 0x04
NUD_DELAY = 0x08
NUD_PROBE = 0x10
NUD_FAILED = 0x20
NUD_NOARP = 0x40
NUD_PERMANENT = 0x80
)
// Neighbor Flags
const (
NTF_USE = 0x01
NTF_SELF = 0x02
NTF_MASTER = 0x04
NTF_PROXY = 0x08
NTF_ROUTER = 0x80
)
type Ndmsg struct {
Family uint8
Index uint32
State uint16
Flags uint8
Type uint8
}
func deserializeNdmsg(b []byte) *Ndmsg {
var dummy Ndmsg
return (*Ndmsg)(unsafe.Pointer(&b[0:unsafe.Sizeof(dummy)][0]))
}
func (msg *Ndmsg) Serialize() []byte {
return (*(*[unsafe.Sizeof(*msg)]byte)(unsafe.Pointer(msg)))[:]
}
func (msg *Ndmsg) Len() int {
return int(unsafe.Sizeof(*msg))
}
// NeighAdd will add an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh add ....`
func NeighAdd(neigh *Neigh) error {
return pkgHandle.NeighAdd(neigh)
}
// NeighAdd will add an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh add ....`
func (h *Handle) NeighAdd(neigh *Neigh) error {
return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL)
}
// NeighSet will add or replace an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh replace....`
func NeighSet(neigh *Neigh) error {
return pkgHandle.NeighSet(neigh)
}
// NeighSet will add or replace an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh replace....`
func (h *Handle) NeighSet(neigh *Neigh) error {
return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE)
}
// NeighAppend will append an entry to FDB
// Equivalent to: `bridge fdb append...`
func NeighAppend(neigh *Neigh) error {
return pkgHandle.NeighAppend(neigh)
}
// NeighAppend will append an entry to FDB
// Equivalent to: `bridge fdb append...`
func (h *Handle) NeighAppend(neigh *Neigh) error {
return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_APPEND)
}
// NeighAppend will append an entry to FDB
// Equivalent to: `bridge fdb append...`
func neighAdd(neigh *Neigh, mode int) error {
return pkgHandle.neighAdd(neigh, mode)
}
// NeighAppend will append an entry to FDB
// Equivalent to: `bridge fdb append...`
func (h *Handle) neighAdd(neigh *Neigh, mode int) error {
req := h.newNetlinkRequest(syscall.RTM_NEWNEIGH, mode|syscall.NLM_F_ACK)
return neighHandle(neigh, req)
}
// NeighDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func NeighDel(neigh *Neigh) error {
return pkgHandle.NeighDel(neigh)
}
// NeighDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func (h *Handle) NeighDel(neigh *Neigh) error {
req := h.newNetlinkRequest(syscall.RTM_DELNEIGH, syscall.NLM_F_ACK)
return neighHandle(neigh, req)
}
func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
var family int
if neigh.Family > 0 {
family = neigh.Family
} else {
family = nl.GetIPFamily(neigh.IP)
}
msg := Ndmsg{
Family: uint8(family),
Index: uint32(neigh.LinkIndex),
State: uint16(neigh.State),
Type: uint8(neigh.Type),
Flags: uint8(neigh.Flags),
}
req.AddData(&msg)
ipData := neigh.IP.To4()
if ipData == nil {
ipData = neigh.IP.To16()
}
dstData := nl.NewRtAttr(NDA_DST, ipData)
req.AddData(dstData)
if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil {
hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr))
req.AddData(hwData)
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// NeighList gets a list of IP-MAC mappings in the system (ARP table).
// Equivalent to: `ip neighbor show`.
// The list can be filtered by link and ip family.
func NeighList(linkIndex, family int) ([]Neigh, error) {
return pkgHandle.NeighList(linkIndex, family)
}
// NeighProxyList gets a list of neighbor proxies in the system.
// Equivalent to: `ip neighbor show proxy`.
// The list can be filtered by link and ip family.
func NeighProxyList(linkIndex, family int) ([]Neigh, error) {
return pkgHandle.NeighProxyList(linkIndex, family)
}
// NeighList gets a list of IP-MAC mappings in the system (ARP table).
// Equivalent to: `ip neighbor show`.
// The list can be filtered by link and ip family.
func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) {
return h.neighList(linkIndex, family, 0)
}
// NeighProxyList gets a list of neighbor proxies in the system.
// Equivalent to: `ip neighbor show proxy`.
// The list can be filtered by link, ip family.
func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) {
return h.neighList(linkIndex, family, NTF_PROXY)
}
func (h *Handle) neighList(linkIndex, family, flags int) ([]Neigh, error) {
req := h.newNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP)
msg := Ndmsg{
Family: uint8(family),
Index: uint32(linkIndex),
Flags: uint8(flags),
}
req.AddData(&msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWNEIGH)
if err != nil {
return nil, err
}
var res []Neigh
for _, m := range msgs {
ndm := deserializeNdmsg(m)
if linkIndex != 0 && int(ndm.Index) != linkIndex {
// Ignore messages from other interfaces
continue
}
neigh, err := NeighDeserialize(m)
if err != nil {
continue
}
res = append(res, *neigh)
}
return res, nil
}
func NeighDeserialize(m []byte) (*Neigh, error) {
msg := deserializeNdmsg(m)
neigh := Neigh{
LinkIndex: int(msg.Index),
Family: int(msg.Family),
State: int(msg.State),
Type: int(msg.Type),
Flags: int(msg.Flags),
}
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
for _, attr := range attrs {
switch attr.Attr.Type {
case NDA_DST:
neigh.IP = net.IP(attr.Value)
case NDA_LLADDR:
neigh.HardwareAddr = net.HardwareAddr(attr.Value)
}
}
return &neigh, nil
}

39
vendor/github.com/vishvananda/netlink/netlink.go generated vendored Normal file
View file

@ -0,0 +1,39 @@
// Package netlink provides a simple library for netlink. Netlink is
// the interface a user-space program in linux uses to communicate with
// the kernel. It can be used to add and remove interfaces, set up ip
// addresses and routes, and confiugre ipsec. Netlink communication
// requires elevated privileges, so in most cases this code needs to
// be run as root. The low level primitives for netlink are contained
// in the nl subpackage. This package attempts to provide a high-level
// interface that is loosly modeled on the iproute2 cli.
package netlink
import (
"errors"
"net"
)
var (
// ErrNotImplemented is returned when a requested feature is not implemented.
ErrNotImplemented = errors.New("not implemented")
)
// ParseIPNet parses a string in ip/net format and returns a net.IPNet.
// This is valuable because addresses in netlink are often IPNets and
// ParseCIDR returns an IPNet with the IP part set to the base IP of the
// range.
func ParseIPNet(s string) (*net.IPNet, error) {
ip, ipNet, err := net.ParseCIDR(s)
if err != nil {
return nil, err
}
return &net.IPNet{IP: ip, Mask: ipNet.Mask}, nil
}
// NewIPNet generates an IPNet from an ip address using a netmask of 32 or 128.
func NewIPNet(ip net.IP) *net.IPNet {
if ip.To4() != nil {
return &net.IPNet{IP: ip, Mask: net.CIDRMask(32, 32)}
}
return &net.IPNet{IP: ip, Mask: net.CIDRMask(128, 128)}
}

11
vendor/github.com/vishvananda/netlink/netlink_linux.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
package netlink
import "github.com/vishvananda/netlink/nl"
// Family type definitions
const (
FAMILY_ALL = nl.FAMILY_ALL
FAMILY_V4 = nl.FAMILY_V4
FAMILY_V6 = nl.FAMILY_V6
FAMILY_MPLS = nl.FAMILY_MPLS
)

View file

@ -0,0 +1,221 @@
// +build !linux
package netlink
import "net"
func LinkSetUp(link Link) error {
return ErrNotImplemented
}
func LinkSetDown(link Link) error {
return ErrNotImplemented
}
func LinkSetMTU(link Link, mtu int) error {
return ErrNotImplemented
}
func LinkSetMaster(link Link, master *Bridge) error {
return ErrNotImplemented
}
func LinkSetNsPid(link Link, nspid int) error {
return ErrNotImplemented
}
func LinkSetNsFd(link Link, fd int) error {
return ErrNotImplemented
}
func LinkSetName(link Link, name string) error {
return ErrNotImplemented
}
func LinkSetAlias(link Link, name string) error {
return ErrNotImplemented
}
func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
return ErrNotImplemented
}
func LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error {
return ErrNotImplemented
}
func LinkSetVfVlan(link Link, vf, vlan int) error {
return ErrNotImplemented
}
func LinkSetVfTxRate(link Link, vf, rate int) error {
return ErrNotImplemented
}
func LinkSetNoMaster(link Link) error {
return ErrNotImplemented
}
func LinkSetMasterByIndex(link Link, masterIndex int) error {
return ErrNotImplemented
}
func LinkSetXdpFd(link Link, fd int) error {
return ErrNotImplemented
}
func LinkSetARPOff(link Link) error {
return ErrNotImplemented
}
func LinkSetARPOn(link Link) error {
return ErrNotImplemented
}
func LinkByName(name string) (Link, error) {
return nil, ErrNotImplemented
}
func LinkByAlias(alias string) (Link, error) {
return nil, ErrNotImplemented
}
func LinkByIndex(index int) (Link, error) {
return nil, ErrNotImplemented
}
func LinkSetHairpin(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkSetGuard(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkSetFastLeave(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkSetLearning(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkSetRootBlock(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkSetFlood(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkAdd(link Link) error {
return ErrNotImplemented
}
func LinkDel(link Link) error {
return ErrNotImplemented
}
func SetHairpin(link Link, mode bool) error {
return ErrNotImplemented
}
func SetGuard(link Link, mode bool) error {
return ErrNotImplemented
}
func SetFastLeave(link Link, mode bool) error {
return ErrNotImplemented
}
func SetLearning(link Link, mode bool) error {
return ErrNotImplemented
}
func SetRootBlock(link Link, mode bool) error {
return ErrNotImplemented
}
func SetFlood(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkList() ([]Link, error) {
return nil, ErrNotImplemented
}
func AddrAdd(link Link, addr *Addr) error {
return ErrNotImplemented
}
func AddrDel(link Link, addr *Addr) error {
return ErrNotImplemented
}
func AddrList(link Link, family int) ([]Addr, error) {
return nil, ErrNotImplemented
}
func RouteAdd(route *Route) error {
return ErrNotImplemented
}
func RouteDel(route *Route) error {
return ErrNotImplemented
}
func RouteList(link Link, family int) ([]Route, error) {
return nil, ErrNotImplemented
}
func XfrmPolicyAdd(policy *XfrmPolicy) error {
return ErrNotImplemented
}
func XfrmPolicyDel(policy *XfrmPolicy) error {
return ErrNotImplemented
}
func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
return nil, ErrNotImplemented
}
func XfrmStateAdd(policy *XfrmState) error {
return ErrNotImplemented
}
func XfrmStateDel(policy *XfrmState) error {
return ErrNotImplemented
}
func XfrmStateList(family int) ([]XfrmState, error) {
return nil, ErrNotImplemented
}
func NeighAdd(neigh *Neigh) error {
return ErrNotImplemented
}
func NeighSet(neigh *Neigh) error {
return ErrNotImplemented
}
func NeighAppend(neigh *Neigh) error {
return ErrNotImplemented
}
func NeighDel(neigh *Neigh) error {
return ErrNotImplemented
}
func NeighList(linkIndex, family int) ([]Neigh, error) {
return nil, ErrNotImplemented
}
func NeighDeserialize(m []byte) (*Neigh, error) {
return nil, ErrNotImplemented
}
func SocketGet(local, remote net.Addr) (*Socket, error) {
return nil, ErrNotImplemented
}

76
vendor/github.com/vishvananda/netlink/nl/addr_linux.go generated vendored Normal file
View file

@ -0,0 +1,76 @@
package nl
import (
"syscall"
"unsafe"
)
type IfAddrmsg struct {
syscall.IfAddrmsg
}
func NewIfAddrmsg(family int) *IfAddrmsg {
return &IfAddrmsg{
IfAddrmsg: syscall.IfAddrmsg{
Family: uint8(family),
},
}
}
// struct ifaddrmsg {
// __u8 ifa_family;
// __u8 ifa_prefixlen; /* The prefix length */
// __u8 ifa_flags; /* Flags */
// __u8 ifa_scope; /* Address scope */
// __u32 ifa_index; /* Link index */
// };
// type IfAddrmsg struct {
// Family uint8
// Prefixlen uint8
// Flags uint8
// Scope uint8
// Index uint32
// }
// SizeofIfAddrmsg = 0x8
func DeserializeIfAddrmsg(b []byte) *IfAddrmsg {
return (*IfAddrmsg)(unsafe.Pointer(&b[0:syscall.SizeofIfAddrmsg][0]))
}
func (msg *IfAddrmsg) Serialize() []byte {
return (*(*[syscall.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:]
}
func (msg *IfAddrmsg) Len() int {
return syscall.SizeofIfAddrmsg
}
// struct ifa_cacheinfo {
// __u32 ifa_prefered;
// __u32 ifa_valid;
// __u32 cstamp; /* created timestamp, hundredths of seconds */
// __u32 tstamp; /* updated timestamp, hundredths of seconds */
// };
const IFA_CACHEINFO = 6
const SizeofIfaCacheInfo = 0x10
type IfaCacheInfo struct {
IfaPrefered uint32
IfaValid uint32
Cstamp uint32
Tstamp uint32
}
func (msg *IfaCacheInfo) Len() int {
return SizeofIfaCacheInfo
}
func DeserializeIfaCacheInfo(b []byte) *IfaCacheInfo {
return (*IfaCacheInfo)(unsafe.Pointer(&b[0:SizeofIfaCacheInfo][0]))
}
func (msg *IfaCacheInfo) Serialize() []byte {
return (*(*[SizeofIfaCacheInfo]byte)(unsafe.Pointer(msg)))[:]
}

View file

@ -0,0 +1,74 @@
package nl
import (
"fmt"
"unsafe"
)
const (
SizeofBridgeVlanInfo = 0x04
)
/* Bridge Flags */
const (
BRIDGE_FLAGS_MASTER = iota /* Bridge command to/from master */
BRIDGE_FLAGS_SELF /* Bridge command to/from lowerdev */
)
/* Bridge management nested attributes
* [IFLA_AF_SPEC] = {
* [IFLA_BRIDGE_FLAGS]
* [IFLA_BRIDGE_MODE]
* [IFLA_BRIDGE_VLAN_INFO]
* }
*/
const (
IFLA_BRIDGE_FLAGS = iota
IFLA_BRIDGE_MODE
IFLA_BRIDGE_VLAN_INFO
)
const (
BRIDGE_VLAN_INFO_MASTER = 1 << iota
BRIDGE_VLAN_INFO_PVID
BRIDGE_VLAN_INFO_UNTAGGED
BRIDGE_VLAN_INFO_RANGE_BEGIN
BRIDGE_VLAN_INFO_RANGE_END
)
// struct bridge_vlan_info {
// __u16 flags;
// __u16 vid;
// };
type BridgeVlanInfo struct {
Flags uint16
Vid uint16
}
func (b *BridgeVlanInfo) Serialize() []byte {
return (*(*[SizeofBridgeVlanInfo]byte)(unsafe.Pointer(b)))[:]
}
func DeserializeBridgeVlanInfo(b []byte) *BridgeVlanInfo {
return (*BridgeVlanInfo)(unsafe.Pointer(&b[0:SizeofBridgeVlanInfo][0]))
}
func (b *BridgeVlanInfo) PortVID() bool {
return b.Flags&BRIDGE_VLAN_INFO_PVID > 0
}
func (b *BridgeVlanInfo) EngressUntag() bool {
return b.Flags&BRIDGE_VLAN_INFO_UNTAGGED > 0
}
func (b *BridgeVlanInfo) String() string {
return fmt.Sprintf("%+v", *b)
}
/* New extended info filters for IFLA_EXT_MASK */
const (
RTEXT_FILTER_VF = 1 << iota
RTEXT_FILTER_BRVLAN
RTEXT_FILTER_BRVLAN_COMPRESSED
)

View file

@ -0,0 +1,189 @@
package nl
import "unsafe"
// Track the message sizes for the correct serialization/deserialization
const (
SizeofNfgenmsg = 4
SizeofNfattr = 4
SizeofNfConntrack = 376
SizeofNfctTupleHead = 52
)
var L4ProtoMap = map[uint8]string{
6: "tcp",
17: "udp",
}
// All the following constants are coming from:
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h
// enum cntl_msg_types {
// IPCTNL_MSG_CT_NEW,
// IPCTNL_MSG_CT_GET,
// IPCTNL_MSG_CT_DELETE,
// IPCTNL_MSG_CT_GET_CTRZERO,
// IPCTNL_MSG_CT_GET_STATS_CPU,
// IPCTNL_MSG_CT_GET_STATS,
// IPCTNL_MSG_CT_GET_DYING,
// IPCTNL_MSG_CT_GET_UNCONFIRMED,
//
// IPCTNL_MSG_MAX
// };
const (
IPCTNL_MSG_CT_GET = 1
IPCTNL_MSG_CT_DELETE = 2
)
// #define NFNETLINK_V0 0
const (
NFNETLINK_V0 = 0
)
// #define NLA_F_NESTED (1 << 15)
const (
NLA_F_NESTED = (1 << 15)
)
// enum ctattr_type {
// CTA_UNSPEC,
// CTA_TUPLE_ORIG,
// CTA_TUPLE_REPLY,
// CTA_STATUS,
// CTA_PROTOINFO,
// CTA_HELP,
// CTA_NAT_SRC,
// #define CTA_NAT CTA_NAT_SRC /* backwards compatibility */
// CTA_TIMEOUT,
// CTA_MARK,
// CTA_COUNTERS_ORIG,
// CTA_COUNTERS_REPLY,
// CTA_USE,
// CTA_ID,
// CTA_NAT_DST,
// CTA_TUPLE_MASTER,
// CTA_SEQ_ADJ_ORIG,
// CTA_NAT_SEQ_ADJ_ORIG = CTA_SEQ_ADJ_ORIG,
// CTA_SEQ_ADJ_REPLY,
// CTA_NAT_SEQ_ADJ_REPLY = CTA_SEQ_ADJ_REPLY,
// CTA_SECMARK, /* obsolete */
// CTA_ZONE,
// CTA_SECCTX,
// CTA_TIMESTAMP,
// CTA_MARK_MASK,
// CTA_LABELS,
// CTA_LABELS_MASK,
// __CTA_MAX
// };
const (
CTA_TUPLE_ORIG = 1
CTA_TUPLE_REPLY = 2
CTA_STATUS = 3
CTA_TIMEOUT = 7
CTA_MARK = 8
CTA_PROTOINFO = 4
)
// enum ctattr_tuple {
// CTA_TUPLE_UNSPEC,
// CTA_TUPLE_IP,
// CTA_TUPLE_PROTO,
// CTA_TUPLE_ZONE,
// __CTA_TUPLE_MAX
// };
// #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
const (
CTA_TUPLE_IP = 1
CTA_TUPLE_PROTO = 2
)
// enum ctattr_ip {
// CTA_IP_UNSPEC,
// CTA_IP_V4_SRC,
// CTA_IP_V4_DST,
// CTA_IP_V6_SRC,
// CTA_IP_V6_DST,
// __CTA_IP_MAX
// };
// #define CTA_IP_MAX (__CTA_IP_MAX - 1)
const (
CTA_IP_V4_SRC = 1
CTA_IP_V4_DST = 2
CTA_IP_V6_SRC = 3
CTA_IP_V6_DST = 4
)
// enum ctattr_l4proto {
// CTA_PROTO_UNSPEC,
// CTA_PROTO_NUM,
// CTA_PROTO_SRC_PORT,
// CTA_PROTO_DST_PORT,
// CTA_PROTO_ICMP_ID,
// CTA_PROTO_ICMP_TYPE,
// CTA_PROTO_ICMP_CODE,
// CTA_PROTO_ICMPV6_ID,
// CTA_PROTO_ICMPV6_TYPE,
// CTA_PROTO_ICMPV6_CODE,
// __CTA_PROTO_MAX
// };
// #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
const (
CTA_PROTO_NUM = 1
CTA_PROTO_SRC_PORT = 2
CTA_PROTO_DST_PORT = 3
)
// enum ctattr_protoinfo {
// CTA_PROTOINFO_UNSPEC,
// CTA_PROTOINFO_TCP,
// CTA_PROTOINFO_DCCP,
// CTA_PROTOINFO_SCTP,
// __CTA_PROTOINFO_MAX
// };
// #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
const (
CTA_PROTOINFO_TCP = 1
)
// enum ctattr_protoinfo_tcp {
// CTA_PROTOINFO_TCP_UNSPEC,
// CTA_PROTOINFO_TCP_STATE,
// CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
// CTA_PROTOINFO_TCP_WSCALE_REPLY,
// CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
// CTA_PROTOINFO_TCP_FLAGS_REPLY,
// __CTA_PROTOINFO_TCP_MAX
// };
// #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
const (
CTA_PROTOINFO_TCP_STATE = 1
CTA_PROTOINFO_TCP_WSCALE_ORIGINAL = 2
CTA_PROTOINFO_TCP_WSCALE_REPLY = 3
CTA_PROTOINFO_TCP_FLAGS_ORIGINAL = 4
CTA_PROTOINFO_TCP_FLAGS_REPLY = 5
)
// /* General form of address family dependent message.
// */
// struct nfgenmsg {
// __u8 nfgen_family; /* AF_xxx */
// __u8 version; /* nfnetlink version */
// __be16 res_id; /* resource id */
// };
type Nfgenmsg struct {
NfgenFamily uint8
Version uint8
ResId uint16 // big endian
}
func (msg *Nfgenmsg) Len() int {
return SizeofNfgenmsg
}
func DeserializeNfgenmsg(b []byte) *Nfgenmsg {
return (*Nfgenmsg)(unsafe.Pointer(&b[0:SizeofNfgenmsg][0]))
}
func (msg *Nfgenmsg) Serialize() []byte {
return (*(*[SizeofNfgenmsg]byte)(unsafe.Pointer(msg)))[:]
}

View file

@ -0,0 +1,89 @@
package nl
import (
"unsafe"
)
const SizeofGenlmsg = 4
const (
GENL_ID_CTRL = 0x10
GENL_CTRL_VERSION = 2
GENL_CTRL_NAME = "nlctrl"
)
const (
GENL_CTRL_CMD_GETFAMILY = 3
)
const (
GENL_CTRL_ATTR_UNSPEC = iota
GENL_CTRL_ATTR_FAMILY_ID
GENL_CTRL_ATTR_FAMILY_NAME
GENL_CTRL_ATTR_VERSION
GENL_CTRL_ATTR_HDRSIZE
GENL_CTRL_ATTR_MAXATTR
GENL_CTRL_ATTR_OPS
GENL_CTRL_ATTR_MCAST_GROUPS
)
const (
GENL_CTRL_ATTR_OP_UNSPEC = iota
GENL_CTRL_ATTR_OP_ID
GENL_CTRL_ATTR_OP_FLAGS
)
const (
GENL_ADMIN_PERM = 1 << iota
GENL_CMD_CAP_DO
GENL_CMD_CAP_DUMP
GENL_CMD_CAP_HASPOL
)
const (
GENL_CTRL_ATTR_MCAST_GRP_UNSPEC = iota
GENL_CTRL_ATTR_MCAST_GRP_NAME
GENL_CTRL_ATTR_MCAST_GRP_ID
)
const (
GENL_GTP_VERSION = 0
GENL_GTP_NAME = "gtp"
)
const (
GENL_GTP_CMD_NEWPDP = iota
GENL_GTP_CMD_DELPDP
GENL_GTP_CMD_GETPDP
)
const (
GENL_GTP_ATTR_UNSPEC = iota
GENL_GTP_ATTR_LINK
GENL_GTP_ATTR_VERSION
GENL_GTP_ATTR_TID
GENL_GTP_ATTR_PEER_ADDRESS
GENL_GTP_ATTR_MS_ADDRESS
GENL_GTP_ATTR_FLOW
GENL_GTP_ATTR_NET_NS_FD
GENL_GTP_ATTR_I_TEI
GENL_GTP_ATTR_O_TEI
GENL_GTP_ATTR_PAD
)
type Genlmsg struct {
Command uint8
Version uint8
}
func (msg *Genlmsg) Len() int {
return SizeofGenlmsg
}
func DeserializeGenlmsg(b []byte) *Genlmsg {
return (*Genlmsg)(unsafe.Pointer(&b[0:SizeofGenlmsg][0]))
}
func (msg *Genlmsg) Serialize() []byte {
return (*(*[SizeofGenlmsg]byte)(unsafe.Pointer(msg)))[:]
}

Some files were not shown because too many files have changed in this diff Show more