From a0e15abf5e60a125ef373b55847cdc22e77b42b9 Mon Sep 17 00:00:00 2001 From: Andrew Pilloud Date: Thu, 15 Jun 2017 15:03:43 -0700 Subject: [PATCH] vendor: add hostport and deps Signed-off-by: Andrew Pilloud --- vendor.conf | 1 + vendor/github.com/renstrom/dedent/LICENSE | 21 + vendor/github.com/renstrom/dedent/README.md | 50 + vendor/github.com/renstrom/dedent/dedent.go | 56 + .../apiserver/pkg/features/kube_features.go | 47 + .../pkg/util/feature/feature_gate.go | 211 +++ .../kubernetes/pkg/api/service/annotations.go | 99 ++ .../k8s.io/kubernetes/pkg/api/service/util.go | 68 + .../kubernetes/pkg/features/kube_features.go | 114 ++ .../kubelet/network/hostport/fake_iptables.go | 346 ++++ .../pkg/kubelet/network/hostport/hostport.go | 171 ++ .../network/hostport/hostport_manager.go | 328 ++++ .../network/hostport/hostport_syncer.go | 305 ++++ vendor/k8s.io/kubernetes/pkg/proxy/doc.go | 18 + .../kubernetes/pkg/proxy/healthcheck/doc.go | 18 + .../pkg/proxy/healthcheck/healthcheck.go | 235 +++ .../kubernetes/pkg/proxy/iptables/proxier.go | 1390 +++++++++++++++++ vendor/k8s.io/kubernetes/pkg/proxy/types.go | 49 + .../kubernetes/pkg/proxy/util/conntrack.go | 58 + .../k8s.io/kubernetes/pkg/util/dbus/dbus.go | 133 ++ vendor/k8s.io/kubernetes/pkg/util/dbus/doc.go | 18 + .../kubernetes/pkg/util/dbus/fake_dbus.go | 135 ++ .../kubernetes/pkg/util/iptables/doc.go | 18 + .../kubernetes/pkg/util/iptables/iptables.go | 581 +++++++ .../pkg/util/iptables/save_restore.go | 108 ++ .../kubernetes/pkg/util/net/sets/doc.go | 28 + .../kubernetes/pkg/util/net/sets/ipnet.go | 119 ++ .../kubernetes/pkg/util/sysctl/sysctl.go | 73 + .../k8s.io/kubernetes/pkg/util/version/doc.go | 18 + .../kubernetes/pkg/util/version/version.go | 236 +++ 30 files changed, 5052 insertions(+) create mode 100644 vendor/github.com/renstrom/dedent/LICENSE create mode 100644 vendor/github.com/renstrom/dedent/README.md create mode 100644 vendor/github.com/renstrom/dedent/dedent.go create mode 100644 vendor/k8s.io/apiserver/pkg/features/kube_features.go create mode 100644 vendor/k8s.io/apiserver/pkg/util/feature/feature_gate.go create mode 100644 vendor/k8s.io/kubernetes/pkg/api/service/annotations.go create mode 100644 vendor/k8s.io/kubernetes/pkg/api/service/util.go create mode 100644 vendor/k8s.io/kubernetes/pkg/features/kube_features.go create mode 100644 vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/fake_iptables.go create mode 100644 vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport.go create mode 100644 vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_manager.go create mode 100644 vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_syncer.go create mode 100644 vendor/k8s.io/kubernetes/pkg/proxy/doc.go create mode 100644 vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/doc.go create mode 100644 vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/healthcheck.go create mode 100644 vendor/k8s.io/kubernetes/pkg/proxy/iptables/proxier.go create mode 100644 vendor/k8s.io/kubernetes/pkg/proxy/types.go create mode 100644 vendor/k8s.io/kubernetes/pkg/proxy/util/conntrack.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/dbus/dbus.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/dbus/doc.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/dbus/fake_dbus.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/iptables/doc.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/iptables/iptables.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/iptables/save_restore.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/net/sets/doc.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/net/sets/ipnet.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/sysctl/sysctl.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/version/doc.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/version/version.go diff --git a/vendor.conf b/vendor.conf index 5f9695f4..0332a126 100644 --- a/vendor.conf +++ b/vendor.conf @@ -69,3 +69,4 @@ github.com/pkg/errors v0.8.0 github.com/godbus/dbus v4.0.0 github.com/urfave/cli v1.19.1 github.com/vbatts/tar-split v0.10.1 +github.com/renstrom/dedent v1.0.0 diff --git a/vendor/github.com/renstrom/dedent/LICENSE b/vendor/github.com/renstrom/dedent/LICENSE new file mode 100644 index 00000000..66a9870f --- /dev/null +++ b/vendor/github.com/renstrom/dedent/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Peter Renström + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/renstrom/dedent/README.md b/vendor/github.com/renstrom/dedent/README.md new file mode 100644 index 00000000..35b5aa13 --- /dev/null +++ b/vendor/github.com/renstrom/dedent/README.md @@ -0,0 +1,50 @@ +# Dedent + +[![Build Status](https://travis-ci.org/renstrom/dedent.svg?branch=master)](https://travis-ci.org/renstrom/dedent) +[![Godoc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/renstrom/dedent) + +Removes common leading whitespace from multiline strings. Inspired by [`textwrap.dedent`](https://docs.python.org/3/library/textwrap.html#textwrap.dedent) in Python. + +## Usage / example + +Imagine the following snippet that prints a multiline string. You want the indentation to both look nice in the code as well as in the actual output. + +```go +package main + +import ( + "fmt" + + "github.com/renstrom/dedent" +) + +func main() { + s := `Lorem ipsum dolor sit amet, + consectetur adipiscing elit. + Curabitur justo tellus, facilisis nec efficitur dictum, + fermentum vitae ligula. Sed eu convallis sapien.` + fmt.Println(dedent.Dedent(s)) + fmt.Println("-------------") + fmt.Println(s) +} +``` + +To illustrate the difference, here's the output: + + +```bash +$ go run main.go +Lorem ipsum dolor sit amet, +consectetur adipiscing elit. +Curabitur justo tellus, facilisis nec efficitur dictum, +fermentum vitae ligula. Sed eu convallis sapien. +------------- +Lorem ipsum dolor sit amet, + consectetur adipiscing elit. + Curabitur justo tellus, facilisis nec efficitur dictum, + fermentum vitae ligula. Sed eu convallis sapien. +``` + +## License + +MIT diff --git a/vendor/github.com/renstrom/dedent/dedent.go b/vendor/github.com/renstrom/dedent/dedent.go new file mode 100644 index 00000000..f58dc47d --- /dev/null +++ b/vendor/github.com/renstrom/dedent/dedent.go @@ -0,0 +1,56 @@ +package dedent + +import ( + "regexp" + "strings" +) + +var whitespaceOnly = regexp.MustCompile("(?m)^[ \t]+$") +var leadingWhitespace = regexp.MustCompile("(?m)(^[ \t]*)") + +// Dedent removes any common leading whitespace from every line in s. +// +// This can be used to make multiline strings to line up with the left edge of +// the display, while still presenting them in the source code in indented +// form. +func Dedent(s string) string { + s = whitespaceOnly.ReplaceAllString(s, "") + margin := findMargin(s) + if len(margin) == 0 { + return s + } + return regexp.MustCompile("(?m)^"+margin).ReplaceAllString(s, "") +} + +// Look for the longest leading string of spaces and tabs common to all lines. +func findMargin(s string) string { + var margin string + + indents := leadingWhitespace.FindAllString(s, -1) + numIndents := len(indents) + for i, indent := range indents { + // Don't use last row if it is empty + if i == numIndents-1 && indent == "" { + break + } + + if margin == "" { + margin = indent + } else if strings.HasPrefix(indent, margin) { + // Current line more deeply indented than previous winner: + // no change (previous winner is still on top). + continue + } else if strings.HasPrefix(margin, indent) { + // Current line consistent with and no deeper than previous winner: + // it's the new winner. + margin = indent + } else { + // Current line and previous winner have no common whitespace: + // there is no margin. + margin = "" + break + } + } + + return margin +} diff --git a/vendor/k8s.io/apiserver/pkg/features/kube_features.go b/vendor/k8s.io/apiserver/pkg/features/kube_features.go new file mode 100644 index 00000000..8ab10fa2 --- /dev/null +++ b/vendor/k8s.io/apiserver/pkg/features/kube_features.go @@ -0,0 +1,47 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package features + +import ( + utilfeature "k8s.io/apiserver/pkg/util/feature" +) + +const ( + // Every feature gate should add method here following this template: + // + // // owner: @username + // // alpha: v1.4 + // MyFeature() bool + + // owner: timstclair + // alpha: v1.5 + // + // StreamingProxyRedirects controls whether the apiserver should intercept (and follow) + // redirects from the backend (Kubelet) for streaming requests (exec/attach/port-forward). + StreamingProxyRedirects utilfeature.Feature = "StreamingProxyRedirects" +) + +func init() { + utilfeature.DefaultFeatureGate.Add(defaultKubernetesFeatureGates) +} + +// defaultKubernetesFeatureGates consists of all known Kubernetes-specific feature keys. +// To add a new feature, define a key for it above and add it here. The features will be +// available throughout Kubernetes binaries. +var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureSpec{ + StreamingProxyRedirects: {Default: true, PreRelease: utilfeature.Beta}, +} diff --git a/vendor/k8s.io/apiserver/pkg/util/feature/feature_gate.go b/vendor/k8s.io/apiserver/pkg/util/feature/feature_gate.go new file mode 100644 index 00000000..e7226688 --- /dev/null +++ b/vendor/k8s.io/apiserver/pkg/util/feature/feature_gate.go @@ -0,0 +1,211 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package feature + +import ( + "fmt" + "sort" + "strconv" + "strings" + + "github.com/golang/glog" + "github.com/spf13/pflag" +) + +type Feature string + +const ( + flagName = "feature-gates" + + // allAlphaGate is a global toggle for alpha features. Per-feature key + // values override the default set by allAlphaGate. Examples: + // AllAlpha=false,NewFeature=true will result in newFeature=true + // AllAlpha=true,NewFeature=false will result in newFeature=false + allAlphaGate Feature = "AllAlpha" +) + +var ( + // The generic features. + defaultFeatures = map[Feature]FeatureSpec{ + allAlphaGate: {Default: false, PreRelease: Alpha}, + } + + // Special handling for a few gates. + specialFeatures = map[Feature]func(f *featureGate, val bool){ + allAlphaGate: setUnsetAlphaGates, + } + + // DefaultFeatureGate is a shared global FeatureGate. + DefaultFeatureGate FeatureGate = NewFeatureGate() +) + +type FeatureSpec struct { + Default bool + PreRelease prerelease +} + +type prerelease string + +const ( + // Values for PreRelease. + Alpha = prerelease("ALPHA") + Beta = prerelease("BETA") + GA = prerelease("") +) + +// FeatureGate parses and stores flag gates for known features from +// a string like feature1=true,feature2=false,... +type FeatureGate interface { + AddFlag(fs *pflag.FlagSet) + Set(value string) error + Enabled(key Feature) bool + Add(features map[Feature]FeatureSpec) error + KnownFeatures() []string +} + +// featureGate implements FeatureGate as well as pflag.Value for flag parsing. +type featureGate struct { + known map[Feature]FeatureSpec + special map[Feature]func(*featureGate, bool) + enabled map[Feature]bool + + // is set to true when AddFlag is called. Note: initialization is not go-routine safe, lookup is + closed bool +} + +func setUnsetAlphaGates(f *featureGate, val bool) { + for k, v := range f.known { + if v.PreRelease == Alpha { + if _, found := f.enabled[k]; !found { + f.enabled[k] = val + } + } + } +} + +// Set, String, and Type implement pflag.Value +var _ pflag.Value = &featureGate{} + +func NewFeatureGate() *featureGate { + f := &featureGate{ + known: map[Feature]FeatureSpec{}, + special: specialFeatures, + enabled: map[Feature]bool{}, + } + for k, v := range defaultFeatures { + f.known[k] = v + } + return f +} + +// Set Parses a string of the form // "key1=value1,key2=value2,..." into a +// map[string]bool of known keys or returns an error. +func (f *featureGate) Set(value string) error { + for _, s := range strings.Split(value, ",") { + if len(s) == 0 { + continue + } + arr := strings.SplitN(s, "=", 2) + k := Feature(strings.TrimSpace(arr[0])) + _, ok := f.known[Feature(k)] + if !ok { + return fmt.Errorf("unrecognized key: %s", k) + } + if len(arr) != 2 { + return fmt.Errorf("missing bool value for %s", k) + } + v := strings.TrimSpace(arr[1]) + boolValue, err := strconv.ParseBool(v) + if err != nil { + return fmt.Errorf("invalid value of %s: %s, err: %v", k, v, err) + } + f.enabled[k] = boolValue + + // Handle "special" features like "all alpha gates" + if fn, found := f.special[k]; found { + fn(f, boolValue) + } + } + + glog.Infof("feature gates: %v", f.enabled) + return nil +} + +func (f *featureGate) String() string { + pairs := []string{} + for k, v := range f.enabled { + pairs = append(pairs, fmt.Sprintf("%s=%t", k, v)) + } + sort.Strings(pairs) + return strings.Join(pairs, ",") +} + +func (f *featureGate) Type() string { + return "mapStringBool" +} + +func (f *featureGate) Add(features map[Feature]FeatureSpec) error { + if f.closed { + return fmt.Errorf("cannot add a feature gate after adding it to the flag set") + } + + for name, spec := range features { + if existingSpec, found := f.known[name]; found { + if existingSpec == spec { + continue + } + return fmt.Errorf("feature gate %q with different spec already exists: %v", name, existingSpec) + } + + f.known[name] = spec + } + return nil +} + +func (f *featureGate) Enabled(key Feature) bool { + defaultValue := f.known[key].Default + if f.enabled != nil { + if v, ok := f.enabled[key]; ok { + return v + } + } + return defaultValue +} + +// AddFlag adds a flag for setting global feature gates to the specified FlagSet. +func (f *featureGate) AddFlag(fs *pflag.FlagSet) { + f.closed = true + + known := f.KnownFeatures() + fs.Var(f, flagName, ""+ + "A set of key=value pairs that describe feature gates for alpha/experimental features. "+ + "Options are:\n"+strings.Join(known, "\n")) +} + +// Returns a string describing the FeatureGate's known features. +func (f *featureGate) KnownFeatures() []string { + var known []string + for k, v := range f.known { + pre := "" + if v.PreRelease != GA { + pre = fmt.Sprintf("%s - ", v.PreRelease) + } + known = append(known, fmt.Sprintf("%s=true|false (%sdefault=%t)", k, pre, v.Default)) + } + sort.Strings(known) + return known +} diff --git a/vendor/k8s.io/kubernetes/pkg/api/service/annotations.go b/vendor/k8s.io/kubernetes/pkg/api/service/annotations.go new file mode 100644 index 00000000..42fd68b7 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/api/service/annotations.go @@ -0,0 +1,99 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "strconv" + + "github.com/golang/glog" + "k8s.io/kubernetes/pkg/api" +) + +const ( + // AnnotationLoadBalancerSourceRangesKey is the key of the annotation on a service to set allowed ingress ranges on their LoadBalancers + // + // It should be a comma-separated list of CIDRs, e.g. `0.0.0.0/0` to + // allow full access (the default) or `18.0.0.0/8,56.0.0.0/8` to allow + // access only from the CIDRs currently allocated to MIT & the USPS. + // + // Not all cloud providers support this annotation, though AWS & GCE do. + AnnotationLoadBalancerSourceRangesKey = "service.beta.kubernetes.io/load-balancer-source-ranges" + + // AnnotationValueExternalTrafficLocal Value of annotation to specify local endpoints behaviour + AnnotationValueExternalTrafficLocal = "OnlyLocal" + // AnnotationValueExternalTrafficGlobal Value of annotation to specify global (legacy) behaviour + AnnotationValueExternalTrafficGlobal = "Global" + + // TODO: The alpha annotations have been deprecated, remove them when we move this feature to GA. + + // AlphaAnnotationHealthCheckNodePort Annotation specifying the healthcheck nodePort for the service + // If not specified, annotation is created by the service api backend with the allocated nodePort + // Will use user-specified nodePort value if specified by the client + AlphaAnnotationHealthCheckNodePort = "service.alpha.kubernetes.io/healthcheck-nodeport" + + // AlphaAnnotationExternalTraffic An annotation that denotes if this Service desires to route external traffic to local + // endpoints only. This preserves Source IP and avoids a second hop. + AlphaAnnotationExternalTraffic = "service.alpha.kubernetes.io/external-traffic" + + // BetaAnnotationHealthCheckNodePort is the beta version of AlphaAnnotationHealthCheckNodePort. + BetaAnnotationHealthCheckNodePort = "service.beta.kubernetes.io/healthcheck-nodeport" + + // BetaAnnotationExternalTraffic is the beta version of AlphaAnnotationExternalTraffic. + BetaAnnotationExternalTraffic = "service.beta.kubernetes.io/external-traffic" +) + +// NeedsHealthCheck Check service for health check annotations +func NeedsHealthCheck(service *api.Service) bool { + // First check the alpha annotation and then the beta. This is so existing + // Services continue to work till the user decides to transition to beta. + // If they transition to beta, there's no way to go back to alpha without + // rolling back the cluster. + for _, annotation := range []string{AlphaAnnotationExternalTraffic, BetaAnnotationExternalTraffic} { + if l, ok := service.Annotations[annotation]; ok { + if l == AnnotationValueExternalTrafficLocal { + return true + } else if l == AnnotationValueExternalTrafficGlobal { + return false + } else { + glog.Errorf("Invalid value for annotation %v: %v", annotation, l) + } + } + } + return false +} + +// GetServiceHealthCheckNodePort Return health check node port annotation for service, if one exists +func GetServiceHealthCheckNodePort(service *api.Service) int32 { + if !NeedsHealthCheck(service) { + return 0 + } + // First check the alpha annotation and then the beta. This is so existing + // Services continue to work till the user decides to transition to beta. + // If they transition to beta, there's no way to go back to alpha without + // rolling back the cluster. + for _, annotation := range []string{AlphaAnnotationHealthCheckNodePort, BetaAnnotationHealthCheckNodePort} { + if l, ok := service.Annotations[annotation]; ok { + p, err := strconv.Atoi(l) + if err != nil { + glog.Errorf("Failed to parse annotation %v: %v", annotation, err) + continue + } + return int32(p) + } + } + return 0 +} diff --git a/vendor/k8s.io/kubernetes/pkg/api/service/util.go b/vendor/k8s.io/kubernetes/pkg/api/service/util.go new file mode 100644 index 00000000..6f0e14e2 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/api/service/util.go @@ -0,0 +1,68 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "fmt" + "strings" + + "k8s.io/kubernetes/pkg/api" + netsets "k8s.io/kubernetes/pkg/util/net/sets" +) + +const ( + defaultLoadBalancerSourceRanges = "0.0.0.0/0" +) + +// IsAllowAll checks whether the netsets.IPNet allows traffic from 0.0.0.0/0 +func IsAllowAll(ipnets netsets.IPNet) bool { + for _, s := range ipnets.StringSlice() { + if s == "0.0.0.0/0" { + return true + } + } + return false +} + +// GetLoadBalancerSourceRanges first try to parse and verify LoadBalancerSourceRanges field from a service. +// If the field is not specified, turn to parse and verify the AnnotationLoadBalancerSourceRangesKey annotation from a service, +// extracting the source ranges to allow, and if not present returns a default (allow-all) value. +func GetLoadBalancerSourceRanges(service *api.Service) (netsets.IPNet, error) { + var ipnets netsets.IPNet + var err error + // if SourceRange field is specified, ignore sourceRange annotation + if len(service.Spec.LoadBalancerSourceRanges) > 0 { + specs := service.Spec.LoadBalancerSourceRanges + ipnets, err = netsets.ParseIPNets(specs...) + + if err != nil { + return nil, fmt.Errorf("service.Spec.LoadBalancerSourceRanges: %v is not valid. Expecting a list of IP ranges. For example, 10.0.0.0/24. Error msg: %v", specs, err) + } + } else { + val := service.Annotations[AnnotationLoadBalancerSourceRangesKey] + val = strings.TrimSpace(val) + if val == "" { + val = defaultLoadBalancerSourceRanges + } + specs := strings.Split(val, ",") + ipnets, err = netsets.ParseIPNets(specs...) + if err != nil { + return nil, fmt.Errorf("%s: %s is not valid. Expecting a comma-separated list of source IP ranges. For example, 10.0.0.0/24,192.168.2.0/24", AnnotationLoadBalancerSourceRangesKey, val) + } + } + return ipnets, nil +} diff --git a/vendor/k8s.io/kubernetes/pkg/features/kube_features.go b/vendor/k8s.io/kubernetes/pkg/features/kube_features.go new file mode 100644 index 00000000..2827af44 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/features/kube_features.go @@ -0,0 +1,114 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package features + +import ( + genericfeatures "k8s.io/apiserver/pkg/features" + utilfeature "k8s.io/apiserver/pkg/util/feature" +) + +const ( + // Every feature gate should add method here following this template: + // + // // owner: @username + // // alpha: v1.4 + // MyFeature() bool + + // owner: @timstclair + // beta: v1.4 + AppArmor utilfeature.Feature = "AppArmor" + + // owner: @girishkalele + // alpha: v1.4 + ExternalTrafficLocalOnly utilfeature.Feature = "AllowExtTrafficLocalEndpoints" + + // owner: @saad-ali + // alpha: v1.3 + DynamicVolumeProvisioning utilfeature.Feature = "DynamicVolumeProvisioning" + + // owner: @mtaufen + // alpha: v1.4 + DynamicKubeletConfig utilfeature.Feature = "DynamicKubeletConfig" + + // owner: timstclair + // alpha: v1.5 + // + // StreamingProxyRedirects controls whether the apiserver should intercept (and follow) + // redirects from the backend (Kubelet) for streaming requests (exec/attach/port-forward). + StreamingProxyRedirects utilfeature.Feature = genericfeatures.StreamingProxyRedirects + + // owner: @pweil- + // alpha: v1.5 + // + // Default userns=host for containers that are using other host namespaces, host mounts, the pod + // contains a privileged container, or specific non-namespaced capabilities (MKNOD, SYS_MODULE, + // SYS_TIME). This should only be enabled if user namespace remapping is enabled in the docker daemon. + ExperimentalHostUserNamespaceDefaultingGate utilfeature.Feature = "ExperimentalHostUserNamespaceDefaulting" + + // owner: @vishh + // alpha: v1.5 + // + // Ensures guaranteed scheduling of pods marked with a special pod annotation `scheduler.alpha.kubernetes.io/critical-pod` + // and also prevents them from being evicted from a node. + // Note: This feature is not supported for `BestEffort` pods. + ExperimentalCriticalPodAnnotation utilfeature.Feature = "ExperimentalCriticalPodAnnotation" + + // owner: @davidopp + // alpha: v1.6 + // + // Determines if affinity defined in annotations should be processed + // TODO: remove when alpha support for affinity is removed + AffinityInAnnotations utilfeature.Feature = "AffinityInAnnotations" + + // owner: @vishh + // alpha: v1.6 + // + // Enables support for GPUs as a schedulable resource. + // Only Nvidia GPUs are supported as of v1.6. + // Works only with Docker Container Runtime. + Accelerators utilfeature.Feature = "Accelerators" + + // owner: @gmarek + // alpha: v1.6 + // + // Changes the logic behind evicting Pods from not ready Nodes + // to take advantage of NoExecute Taints and Tolerations. + TaintBasedEvictions utilfeature.Feature = "TaintBasedEvictions" +) + +func init() { + utilfeature.DefaultFeatureGate.Add(defaultKubernetesFeatureGates) +} + +// defaultKubernetesFeatureGates consists of all known Kubernetes-specific feature keys. +// To add a new feature, define a key for it above and add it here. The features will be +// available throughout Kubernetes binaries. +var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureSpec{ + ExternalTrafficLocalOnly: {Default: true, PreRelease: utilfeature.Beta}, + AppArmor: {Default: true, PreRelease: utilfeature.Beta}, + DynamicKubeletConfig: {Default: false, PreRelease: utilfeature.Alpha}, + DynamicVolumeProvisioning: {Default: true, PreRelease: utilfeature.Alpha}, + ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: utilfeature.Beta}, + ExperimentalCriticalPodAnnotation: {Default: false, PreRelease: utilfeature.Alpha}, + AffinityInAnnotations: {Default: false, PreRelease: utilfeature.Alpha}, + Accelerators: {Default: false, PreRelease: utilfeature.Alpha}, + TaintBasedEvictions: {Default: false, PreRelease: utilfeature.Alpha}, + + // inherited features from generic apiserver, relisted here to get a conflict if it is changed + // unintentionally on either side: + StreamingProxyRedirects: {Default: true, PreRelease: utilfeature.Beta}, +} diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/fake_iptables.go b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/fake_iptables.go new file mode 100644 index 00000000..d8c05bad --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/fake_iptables.go @@ -0,0 +1,346 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hostport + +import ( + "bytes" + "fmt" + "net" + "strings" + + utiliptables "k8s.io/kubernetes/pkg/util/iptables" +) + +type fakeChain struct { + name utiliptables.Chain + rules []string +} + +type fakeTable struct { + name utiliptables.Table + chains map[string]*fakeChain +} + +type fakeIPTables struct { + tables map[string]*fakeTable +} + +func NewFakeIPTables() *fakeIPTables { + return &fakeIPTables{ + tables: make(map[string]*fakeTable, 0), + } +} + +func (f *fakeIPTables) GetVersion() (string, error) { + return "1.4.21", nil +} + +func (f *fakeIPTables) getTable(tableName utiliptables.Table) (*fakeTable, error) { + table, ok := f.tables[string(tableName)] + if !ok { + return nil, fmt.Errorf("Table %s does not exist", tableName) + } + return table, nil +} + +func (f *fakeIPTables) getChain(tableName utiliptables.Table, chainName utiliptables.Chain) (*fakeTable, *fakeChain, error) { + table, err := f.getTable(tableName) + if err != nil { + return nil, nil, err + } + + chain, ok := table.chains[string(chainName)] + if !ok { + return table, nil, fmt.Errorf("Chain %s/%s does not exist", tableName, chainName) + } + + return table, chain, nil +} + +func (f *fakeIPTables) ensureChain(tableName utiliptables.Table, chainName utiliptables.Chain) (bool, *fakeChain) { + table, chain, err := f.getChain(tableName, chainName) + if err != nil { + // either table or table+chain don't exist yet + if table == nil { + table = &fakeTable{ + name: tableName, + chains: make(map[string]*fakeChain), + } + f.tables[string(tableName)] = table + } + chain := &fakeChain{ + name: chainName, + rules: make([]string, 0), + } + table.chains[string(chainName)] = chain + return false, chain + } + return true, chain +} + +func (f *fakeIPTables) EnsureChain(tableName utiliptables.Table, chainName utiliptables.Chain) (bool, error) { + existed, _ := f.ensureChain(tableName, chainName) + return existed, nil +} + +func (f *fakeIPTables) FlushChain(tableName utiliptables.Table, chainName utiliptables.Chain) error { + _, chain, err := f.getChain(tableName, chainName) + if err != nil { + return err + } + chain.rules = make([]string, 0) + return nil +} + +func (f *fakeIPTables) DeleteChain(tableName utiliptables.Table, chainName utiliptables.Chain) error { + table, _, err := f.getChain(tableName, chainName) + if err != nil { + return err + } + delete(table.chains, string(chainName)) + return nil +} + +// Returns index of rule in array; < 0 if rule is not found +func findRule(chain *fakeChain, rule string) int { + for i, candidate := range chain.rules { + if rule == candidate { + return i + } + } + return -1 +} + +func (f *fakeIPTables) ensureRule(position utiliptables.RulePosition, tableName utiliptables.Table, chainName utiliptables.Chain, rule string) (bool, error) { + _, chain, err := f.getChain(tableName, chainName) + if err != nil { + _, chain = f.ensureChain(tableName, chainName) + } + + rule, err = normalizeRule(rule) + if err != nil { + return false, err + } + ruleIdx := findRule(chain, rule) + if ruleIdx >= 0 { + return true, nil + } + + if position == utiliptables.Prepend { + chain.rules = append([]string{rule}, chain.rules...) + } else if position == utiliptables.Append { + chain.rules = append(chain.rules, rule) + } else { + return false, fmt.Errorf("Unknown position argument %q", position) + } + + return false, nil +} + +func normalizeRule(rule string) (string, error) { + normalized := "" + remaining := strings.TrimSpace(rule) + for { + var end int + + if strings.HasPrefix(remaining, "--to-destination=") { + remaining = strings.Replace(remaining, "=", " ", 1) + } + + if remaining[0] == '"' { + end = strings.Index(remaining[1:], "\"") + if end < 0 { + return "", fmt.Errorf("Invalid rule syntax: mismatched quotes") + } + end += 2 + } else { + end = strings.Index(remaining, " ") + if end < 0 { + end = len(remaining) + } + } + arg := remaining[:end] + + // Normalize un-prefixed IP addresses like iptables does + if net.ParseIP(arg) != nil { + arg = arg + "/32" + } + + if len(normalized) > 0 { + normalized += " " + } + normalized += strings.TrimSpace(arg) + if len(remaining) == end { + break + } + remaining = remaining[end+1:] + } + return normalized, nil +} + +func (f *fakeIPTables) EnsureRule(position utiliptables.RulePosition, tableName utiliptables.Table, chainName utiliptables.Chain, args ...string) (bool, error) { + ruleArgs := make([]string, 0) + for _, arg := range args { + // quote args with internal spaces (like comments) + if strings.Index(arg, " ") >= 0 { + arg = fmt.Sprintf("\"%s\"", arg) + } + ruleArgs = append(ruleArgs, arg) + } + return f.ensureRule(position, tableName, chainName, strings.Join(ruleArgs, " ")) +} + +func (f *fakeIPTables) DeleteRule(tableName utiliptables.Table, chainName utiliptables.Chain, args ...string) error { + _, chain, err := f.getChain(tableName, chainName) + if err == nil { + rule := strings.Join(args, " ") + ruleIdx := findRule(chain, rule) + if ruleIdx < 0 { + return nil + } + chain.rules = append(chain.rules[:ruleIdx], chain.rules[ruleIdx+1:]...) + } + return nil +} + +func (f *fakeIPTables) IsIpv6() bool { + return false +} + +func saveChain(chain *fakeChain, data *bytes.Buffer) { + for _, rule := range chain.rules { + data.WriteString(fmt.Sprintf("-A %s %s\n", chain.name, rule)) + } +} + +func (f *fakeIPTables) Save(tableName utiliptables.Table) ([]byte, error) { + table, err := f.getTable(tableName) + if err != nil { + return nil, err + } + + data := bytes.NewBuffer(nil) + data.WriteString(fmt.Sprintf("*%s\n", table.name)) + + rules := bytes.NewBuffer(nil) + for _, chain := range table.chains { + data.WriteString(fmt.Sprintf(":%s - [0:0]\n", string(chain.name))) + saveChain(chain, rules) + } + data.Write(rules.Bytes()) + data.WriteString("COMMIT\n") + return data.Bytes(), nil +} + +func (f *fakeIPTables) SaveAll() ([]byte, error) { + data := bytes.NewBuffer(nil) + for _, table := range f.tables { + tableData, err := f.Save(table.name) + if err != nil { + return nil, err + } + if _, err = data.Write(tableData); err != nil { + return nil, err + } + } + return data.Bytes(), nil +} + +func (f *fakeIPTables) restore(restoreTableName utiliptables.Table, data []byte, flush utiliptables.FlushFlag) error { + buf := bytes.NewBuffer(data) + var tableName utiliptables.Table + for { + line, err := buf.ReadString('\n') + if err != nil { + break + } + if line[0] == '#' { + continue + } + + line = strings.TrimSuffix(line, "\n") + if strings.HasPrefix(line, "*") { + tableName = utiliptables.Table(line[1:]) + } + if tableName != "" { + if restoreTableName != "" && restoreTableName != tableName { + continue + } + if strings.HasPrefix(line, ":") { + chainName := utiliptables.Chain(strings.Split(line[1:], " ")[0]) + if flush == utiliptables.FlushTables { + table, chain, _ := f.getChain(tableName, chainName) + if chain != nil { + delete(table.chains, string(chainName)) + } + } + _, _ = f.ensureChain(tableName, chainName) + } else if strings.HasPrefix(line, "-A") { + parts := strings.Split(line, " ") + if len(parts) < 3 { + return fmt.Errorf("Invalid iptables rule '%s'", line) + } + chainName := utiliptables.Chain(parts[1]) + rule := strings.TrimPrefix(line, fmt.Sprintf("-A %s ", chainName)) + _, err := f.ensureRule(utiliptables.Append, tableName, chainName, rule) + if err != nil { + return err + } + } else if strings.HasPrefix(line, "-I") { + parts := strings.Split(line, " ") + if len(parts) < 3 { + return fmt.Errorf("Invalid iptables rule '%s'", line) + } + chainName := utiliptables.Chain(parts[1]) + rule := strings.TrimPrefix(line, fmt.Sprintf("-I %s ", chainName)) + _, err := f.ensureRule(utiliptables.Prepend, tableName, chainName, rule) + if err != nil { + return err + } + } else if strings.HasPrefix(line, "-X") { + parts := strings.Split(line, " ") + if len(parts) < 2 { + return fmt.Errorf("Invalid iptables rule '%s'", line) + } + if err := f.DeleteChain(tableName, utiliptables.Chain(parts[1])); err != nil { + return err + } + } else if line == "COMMIT" { + if restoreTableName == tableName { + return nil + } + tableName = "" + } + } + } + + return nil +} + +func (f *fakeIPTables) Restore(tableName utiliptables.Table, data []byte, flush utiliptables.FlushFlag, counters utiliptables.RestoreCountersFlag) error { + return f.restore(tableName, data, flush) +} + +func (f *fakeIPTables) RestoreAll(data []byte, flush utiliptables.FlushFlag, counters utiliptables.RestoreCountersFlag) error { + return f.restore("", data, flush) +} + +func (f *fakeIPTables) AddReloadFunc(reloadFunc func()) { +} + +func (f *fakeIPTables) Destroy() { +} diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport.go b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport.go new file mode 100644 index 00000000..374df8eb --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport.go @@ -0,0 +1,171 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hostport + +import ( + "fmt" + "github.com/golang/glog" + "net" + "strings" + + "k8s.io/kubernetes/pkg/api/v1" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" +) + +const ( + // the hostport chain + kubeHostportsChain utiliptables.Chain = "KUBE-HOSTPORTS" + // prefix for hostport chains + kubeHostportChainPrefix string = "KUBE-HP-" +) + +// PortMapping represents a network port in a container +type PortMapping struct { + Name string + HostPort int32 + ContainerPort int32 + Protocol v1.Protocol + HostIP string +} + +// PodPortMapping represents a pod's network state and associated container port mappings +type PodPortMapping struct { + Namespace string + Name string + PortMappings []*PortMapping + HostNetwork bool + IP net.IP +} + +type hostport struct { + port int32 + protocol string +} + +type hostportOpener func(*hostport) (closeable, error) + +type closeable interface { + Close() error +} + +func openLocalPort(hp *hostport) (closeable, error) { + // For ports on node IPs, open the actual port and hold it, even though we + // use iptables to redirect traffic. + // This ensures a) that it's safe to use that port and b) that (a) stays + // true. The risk is that some process on the node (e.g. sshd or kubelet) + // is using a port and we give that same port out to a Service. That would + // be bad because iptables would silently claim the traffic but the process + // would never know. + // NOTE: We should not need to have a real listen()ing socket - bind() + // should be enough, but I can't figure out a way to e2e test without + // it. Tools like 'ss' and 'netstat' do not show sockets that are + // bind()ed but not listen()ed, and at least the default debian netcat + // has no way to avoid about 10 seconds of retries. + var socket closeable + switch hp.protocol { + case "tcp": + listener, err := net.Listen("tcp", fmt.Sprintf(":%d", hp.port)) + if err != nil { + return nil, err + } + socket = listener + case "udp": + addr, err := net.ResolveUDPAddr("udp", fmt.Sprintf(":%d", hp.port)) + if err != nil { + return nil, err + } + conn, err := net.ListenUDP("udp", addr) + if err != nil { + return nil, err + } + socket = conn + default: + return nil, fmt.Errorf("unknown protocol %q", hp.protocol) + } + glog.V(3).Infof("Opened local port %s", hp.String()) + return socket, nil +} + +// openHostports opens all given hostports using the given hostportOpener +// If encounter any error, clean up and return the error +// If all ports are opened successfully, return the hostport and socket mapping +// TODO: move openHostports and closeHostports into a common struct +func openHostports(portOpener hostportOpener, podPortMapping *PodPortMapping) (map[hostport]closeable, error) { + var retErr error + ports := make(map[hostport]closeable) + for _, pm := range podPortMapping.PortMappings { + if pm.HostPort <= 0 { + continue + } + hp := portMappingToHostport(pm) + socket, err := portOpener(&hp) + if err != nil { + retErr = fmt.Errorf("cannot open hostport %d for pod %s: %v", pm.HostPort, getPodFullName(podPortMapping), err) + break + } + ports[hp] = socket + } + + // If encounter any error, close all hostports that just got opened. + if retErr != nil { + for hp, socket := range ports { + if err := socket.Close(); err != nil { + glog.Errorf("Cannot clean up hostport %d for pod %s: %v", hp.port, getPodFullName(podPortMapping), err) + } + } + return nil, retErr + } + return ports, nil +} + +// portMappingToHostport creates hostport structure based on input portmapping +func portMappingToHostport(portMapping *PortMapping) hostport { + return hostport{ + port: portMapping.HostPort, + protocol: strings.ToLower(string(portMapping.Protocol)), + } +} + +// ensureKubeHostportChains ensures the KUBE-HOSTPORTS chain is setup correctly +func ensureKubeHostportChains(iptables utiliptables.Interface, natInterfaceName string) error { + glog.V(4).Info("Ensuring kubelet hostport chains") + // Ensure kubeHostportChain + if _, err := iptables.EnsureChain(utiliptables.TableNAT, kubeHostportsChain); err != nil { + return fmt.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, kubeHostportsChain, err) + } + tableChainsNeedJumpServices := []struct { + table utiliptables.Table + chain utiliptables.Chain + }{ + {utiliptables.TableNAT, utiliptables.ChainOutput}, + {utiliptables.TableNAT, utiliptables.ChainPrerouting}, + } + args := []string{"-m", "comment", "--comment", "kube hostport portals", + "-m", "addrtype", "--dst-type", "LOCAL", + "-j", string(kubeHostportsChain)} + for _, tc := range tableChainsNeedJumpServices { + if _, err := iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil { + return fmt.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, kubeHostportsChain, err) + } + } + // Need to SNAT traffic from localhost + args = []string{"-m", "comment", "--comment", "SNAT for localhost access to hostports", "-o", natInterfaceName, "-s", "127.0.0.0/8", "-j", "MASQUERADE"} + if _, err := iptables.EnsureRule(utiliptables.Append, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { + return fmt.Errorf("Failed to ensure that %s chain %s jumps to MASQUERADE: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, err) + } + return nil +} diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_manager.go new file mode 100644 index 00000000..5c18f8fc --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_manager.go @@ -0,0 +1,328 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hostport + +import ( + "bytes" + "crypto/sha256" + "encoding/base32" + "fmt" + "strings" + "sync" + + "github.com/golang/glog" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + iptablesproxy "k8s.io/kubernetes/pkg/proxy/iptables" + utildbus "k8s.io/kubernetes/pkg/util/dbus" + utilexec "k8s.io/kubernetes/pkg/util/exec" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" +) + +// HostPortManager is an interface for adding and removing hostport for a given pod sandbox. +type HostPortManager interface { + // Add implements port mappings. + // id should be a unique identifier for a pod, e.g. podSandboxID. + // podPortMapping is the associated port mapping information for the pod. + // natInterfaceName is the interface that localhost used to talk to the given pod. + Add(id string, podPortMapping *PodPortMapping, natInterfaceName string) error + // Remove cleans up matching port mappings + // Remove must be able to clean up port mappings without pod IP + Remove(id string, podPortMapping *PodPortMapping) error +} + +type hostportManager struct { + hostPortMap map[hostport]closeable + iptables utiliptables.Interface + portOpener hostportOpener + mu sync.Mutex +} + +func NewHostportManager() HostPortManager { + iptInterface := utiliptables.New(utilexec.New(), utildbus.New(), utiliptables.ProtocolIpv4) + return &hostportManager{ + hostPortMap: make(map[hostport]closeable), + iptables: iptInterface, + portOpener: openLocalPort, + } +} + +func (hm *hostportManager) Add(id string, podPortMapping *PodPortMapping, natInterfaceName string) (err error) { + if podPortMapping == nil || podPortMapping.HostNetwork { + return nil + } + podFullName := getPodFullName(podPortMapping) + + // skip if there is no hostport needed + hostportMappings := gatherHostportMappings(podPortMapping) + if len(hostportMappings) == 0 { + return nil + } + + if podPortMapping.IP.To4() == nil { + return fmt.Errorf("invalid or missing IP of pod %s", podFullName) + } + podIP := podPortMapping.IP.String() + + if err = ensureKubeHostportChains(hm.iptables, natInterfaceName); err != nil { + return err + } + + // Ensure atomicity for port opening and iptables operations + hm.mu.Lock() + defer hm.mu.Unlock() + + // try to open hostports + ports, err := openHostports(hm.portOpener, podPortMapping) + if err != nil { + return err + } + for hostport, socket := range ports { + hm.hostPortMap[hostport] = socket + } + + natChains := bytes.NewBuffer(nil) + natRules := bytes.NewBuffer(nil) + writeLine(natChains, "*nat") + + existingChains, existingRules, err := getExistingHostportIPTablesRules(hm.iptables) + if err != nil { + // clean up opened host port if encounter any error + return utilerrors.NewAggregate([]error{err, hm.closeHostports(hostportMappings)}) + } + + newChains := []utiliptables.Chain{} + for _, pm := range hostportMappings { + protocol := strings.ToLower(string(pm.Protocol)) + chain := getHostportChain(id, pm) + newChains = append(newChains, chain) + + // Add new hostport chain + writeLine(natChains, utiliptables.MakeChainLine(chain)) + + // Prepend the new chain to KUBE-HOSTPORTS + // This avoids any leaking iptables rule that takes up the same port + writeLine(natRules, "-I", string(kubeHostportsChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s hostport %d"`, podFullName, pm.HostPort), + "-m", protocol, "-p", protocol, "--dport", fmt.Sprintf("%d", pm.HostPort), + "-j", string(chain), + ) + + // SNAT if the traffic comes from the pod itself + writeLine(natRules, "-A", string(chain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s hostport %d"`, podFullName, pm.HostPort), + "-s", podIP, + "-j", string(iptablesproxy.KubeMarkMasqChain)) + + // DNAT to the podIP:containerPort + writeLine(natRules, "-A", string(chain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s hostport %d"`, podFullName, pm.HostPort), + "-m", protocol, "-p", protocol, + "-j", "DNAT", fmt.Sprintf("--to-destination=%s:%d", podIP, pm.ContainerPort)) + } + + // getHostportChain should be able to provide unique hostport chain name using hash + // if there is a chain conflict or multiple Adds have been triggered for a single pod, + // filtering should be able to avoid further problem + filterChains(existingChains, newChains) + existingRules = filterRules(existingRules, newChains) + + for _, chain := range existingChains { + writeLine(natChains, chain) + } + for _, rule := range existingRules { + writeLine(natRules, rule) + } + writeLine(natRules, "COMMIT") + + if err = hm.syncIPTables(append(natChains.Bytes(), natRules.Bytes()...)); err != nil { + // clean up opened host port if encounter any error + return utilerrors.NewAggregate([]error{err, hm.closeHostports(hostportMappings)}) + } + return nil +} + +func (hm *hostportManager) Remove(id string, podPortMapping *PodPortMapping) (err error) { + if podPortMapping == nil || podPortMapping.HostNetwork { + return nil + } + + hostportMappings := gatherHostportMappings(podPortMapping) + if len(hostportMappings) <= 0 { + return nil + } + + // Ensure atomicity for port closing and iptables operations + hm.mu.Lock() + defer hm.mu.Unlock() + + var existingChains map[utiliptables.Chain]string + var existingRules []string + existingChains, existingRules, err = getExistingHostportIPTablesRules(hm.iptables) + if err != nil { + return err + } + + // Gather target hostport chains for removal + chainsToRemove := []utiliptables.Chain{} + for _, pm := range hostportMappings { + chainsToRemove = append(chainsToRemove, getHostportChain(id, pm)) + + // To preserve backward compatibility for k8s 1.5 or earlier. + // Need to remove hostport chains added by hostportSyncer if there is any + // TODO: remove this in 1.7 + chainsToRemove = append(chainsToRemove, hostportChainName(pm, getPodFullName(podPortMapping))) + } + + // remove rules that consists of target chains + remainingRules := filterRules(existingRules, chainsToRemove) + + // gather target hostport chains that exists in iptables-save result + existingChainsToRemove := []utiliptables.Chain{} + for _, chain := range chainsToRemove { + if _, ok := existingChains[chain]; ok { + existingChainsToRemove = append(existingChainsToRemove, chain) + } + } + + natChains := bytes.NewBuffer(nil) + natRules := bytes.NewBuffer(nil) + writeLine(natChains, "*nat") + for _, chain := range existingChains { + writeLine(natChains, chain) + } + for _, rule := range remainingRules { + writeLine(natRules, rule) + } + for _, chain := range existingChainsToRemove { + writeLine(natRules, "-X", string(chain)) + } + writeLine(natRules, "COMMIT") + + if err = hm.syncIPTables(append(natChains.Bytes(), natRules.Bytes()...)); err != nil { + return err + } + + // clean up opened pod host ports + return hm.closeHostports(hostportMappings) +} + +// syncIPTables executes iptables-restore with given lines +func (hm *hostportManager) syncIPTables(lines []byte) error { + glog.V(3).Infof("Restoring iptables rules: %s", lines) + err := hm.iptables.RestoreAll(lines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) + if err != nil { + return fmt.Errorf("Failed to execute iptables-restore: %v", err) + } + return nil +} + +// closeHostports tries to close all the listed host ports +// TODO: move closeHostports and openHostports into a common struct +func (hm *hostportManager) closeHostports(hostportMappings []*PortMapping) error { + errList := []error{} + for _, pm := range hostportMappings { + hp := portMappingToHostport(pm) + if socket, ok := hm.hostPortMap[hp]; ok { + glog.V(2).Infof("Closing host port %s", hp.String()) + if err := socket.Close(); err != nil { + errList = append(errList, fmt.Errorf("failed to close host port %s: %v", hp.String(), err)) + continue + } + delete(hm.hostPortMap, hp) + } + } + return utilerrors.NewAggregate(errList) +} + +// getHostportChain takes id, hostport and protocol for a pod and returns associated iptables chain. +// This is computed by hashing (sha256) then encoding to base32 and truncating with the prefix +// "KUBE-HP-". We do this because IPTables Chain Names must be <= 28 chars long, and the longer +// they are the harder they are to read. +// WARNING: Please do not change this function. Otherwise, HostportManager may not be able to +// identify existing iptables chains. +func getHostportChain(id string, pm *PortMapping) utiliptables.Chain { + hash := sha256.Sum256([]byte(id + string(pm.HostPort) + string(pm.Protocol))) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return utiliptables.Chain(kubeHostportChainPrefix + encoded[:16]) +} + +// gatherHostportMappings returns all the PortMappings which has hostport for a pod +func gatherHostportMappings(podPortMapping *PodPortMapping) []*PortMapping { + mappings := []*PortMapping{} + for _, pm := range podPortMapping.PortMappings { + if pm.HostPort <= 0 { + continue + } + mappings = append(mappings, pm) + } + return mappings +} + +// getExistingHostportIPTablesRules retrieves raw data from iptables-save, parse it, +// return all the hostport related chains and rules +func getExistingHostportIPTablesRules(iptables utiliptables.Interface) (map[utiliptables.Chain]string, []string, error) { + iptablesSaveRaw, err := iptables.Save(utiliptables.TableNAT) + if err != nil { // if we failed to get any rules + return nil, nil, fmt.Errorf("failed to execute iptables-save: %v", err) + } + existingNATChains := utiliptables.GetChainLines(utiliptables.TableNAT, iptablesSaveRaw) + + existingHostportChains := make(map[utiliptables.Chain]string) + existingHostportRules := []string{} + + for chain := range existingNATChains { + if strings.HasPrefix(string(chain), string(kubeHostportsChain)) || strings.HasPrefix(string(chain), kubeHostportChainPrefix) { + existingHostportChains[chain] = existingNATChains[chain] + } + } + + for _, line := range strings.Split(string(iptablesSaveRaw), "\n") { + if strings.HasPrefix(line, fmt.Sprintf("-A %s", kubeHostportChainPrefix)) || + strings.HasPrefix(line, fmt.Sprintf("-A %s", string(kubeHostportsChain))) { + existingHostportRules = append(existingHostportRules, line) + } + } + return existingHostportChains, existingHostportRules, nil +} + +// filterRules filters input rules with input chains. Rules that did not involve any filter chain will be returned. +// The order of the input rules is important and is preserved. +func filterRules(rules []string, filters []utiliptables.Chain) []string { + filtered := []string{} + for _, rule := range rules { + skip := false + for _, filter := range filters { + if strings.Contains(rule, string(filter)) { + skip = true + break + } + } + if !skip { + filtered = append(filtered, rule) + } + } + return filtered +} + +// filterChains deletes all entries of filter chains from chain map +func filterChains(chains map[utiliptables.Chain]string, filterChains []utiliptables.Chain) { + for _, chain := range filterChains { + if _, ok := chains[chain]; ok { + delete(chains, chain) + } + } +} diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_syncer.go b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_syncer.go new file mode 100644 index 00000000..c72c9e16 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/network/hostport/hostport_syncer.go @@ -0,0 +1,305 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hostport + +import ( + "bytes" + "crypto/sha256" + "encoding/base32" + "fmt" + "strings" + "time" + + "github.com/golang/glog" + + iptablesproxy "k8s.io/kubernetes/pkg/proxy/iptables" + utildbus "k8s.io/kubernetes/pkg/util/dbus" + utilexec "k8s.io/kubernetes/pkg/util/exec" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" +) + +// HostportSyncer takes a list of PodPortMappings and implements hostport all at once +type HostportSyncer interface { + // SyncHostports gathers all hostports on node and setup iptables rules to enable them. + // On each invocation existing ports are synced and stale rules are deleted. + SyncHostports(natInterfaceName string, activePodPortMappings []*PodPortMapping) error + // OpenPodHostportsAndSync opens hostports for a new PodPortMapping, gathers all hostports on + // node, sets up iptables rules enable them. On each invocation existing ports are synced and stale rules are deleted. + // 'newPortMapping' must also be present in 'activePodPortMappings'. + OpenPodHostportsAndSync(newPortMapping *PodPortMapping, natInterfaceName string, activePodPortMappings []*PodPortMapping) error +} + +type hostportSyncer struct { + hostPortMap map[hostport]closeable + iptables utiliptables.Interface + portOpener hostportOpener +} + +func NewHostportSyncer() HostportSyncer { + iptInterface := utiliptables.New(utilexec.New(), utildbus.New(), utiliptables.ProtocolIpv4) + return &hostportSyncer{ + hostPortMap: make(map[hostport]closeable), + iptables: iptInterface, + portOpener: openLocalPort, + } +} + +type targetPod struct { + podFullName string + podIP string +} + +func (hp *hostport) String() string { + return fmt.Sprintf("%s:%d", hp.protocol, hp.port) +} + +//openPodHostports opens all hostport for pod and returns the map of hostport and socket +func (h *hostportSyncer) openHostports(podHostportMapping *PodPortMapping) error { + var retErr error + ports := make(map[hostport]closeable) + for _, port := range podHostportMapping.PortMappings { + if port.HostPort <= 0 { + // Assume hostport is not specified in this portmapping. So skip + continue + } + hp := hostport{ + port: port.HostPort, + protocol: strings.ToLower(string(port.Protocol)), + } + socket, err := h.portOpener(&hp) + if err != nil { + retErr = fmt.Errorf("cannot open hostport %d for pod %s: %v", port.HostPort, getPodFullName(podHostportMapping), err) + break + } + ports[hp] = socket + } + + // If encounter any error, close all hostports that just got opened. + if retErr != nil { + for hp, socket := range ports { + if err := socket.Close(); err != nil { + glog.Errorf("Cannot clean up hostport %d for pod %s: %v", hp.port, getPodFullName(podHostportMapping), err) + } + } + return retErr + } + + for hostPort, socket := range ports { + h.hostPortMap[hostPort] = socket + } + + return nil +} + +func getPodFullName(pod *PodPortMapping) string { + // Use underscore as the delimiter because it is not allowed in pod name + // (DNS subdomain format), while allowed in the container name format. + return pod.Name + "_" + pod.Namespace +} + +// gatherAllHostports returns all hostports that should be presented on node, +// given the list of pods running on that node and ignoring host network +// pods (which don't need hostport <-> container port mapping). +func gatherAllHostports(activePodPortMappings []*PodPortMapping) (map[*PortMapping]targetPod, error) { + podHostportMap := make(map[*PortMapping]targetPod) + for _, pm := range activePodPortMappings { + if pm.IP.To4() == nil { + return nil, fmt.Errorf("Invalid or missing pod %s IP", getPodFullName(pm)) + } + // should not handle hostports for hostnetwork pods + if pm.HostNetwork { + continue + } + + for _, port := range pm.PortMappings { + if port.HostPort != 0 { + podHostportMap[port] = targetPod{podFullName: getPodFullName(pm), podIP: pm.IP.String()} + } + } + } + return podHostportMap, nil +} + +// Join all words with spaces, terminate with newline and write to buf. +func writeLine(buf *bytes.Buffer, words ...string) { + buf.WriteString(strings.Join(words, " ") + "\n") +} + +//hostportChainName takes containerPort for a pod and returns associated iptables chain. +// This is computed by hashing (sha256) +// then encoding to base32 and truncating with the prefix "KUBE-SVC-". We do +// this because IPTables Chain Names must be <= 28 chars long, and the longer +// they are the harder they are to read. +func hostportChainName(pm *PortMapping, podFullName string) utiliptables.Chain { + hash := sha256.Sum256([]byte(string(pm.HostPort) + string(pm.Protocol) + podFullName)) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return utiliptables.Chain(kubeHostportChainPrefix + encoded[:16]) +} + +// OpenPodHostportsAndSync opens hostports for a new PodPortMapping, gathers all hostports on +// node, sets up iptables rules enable them. And finally clean up stale hostports. +// 'newPortMapping' must also be present in 'activePodPortMappings'. +func (h *hostportSyncer) OpenPodHostportsAndSync(newPortMapping *PodPortMapping, natInterfaceName string, activePodPortMappings []*PodPortMapping) error { + // try to open pod host port if specified + if err := h.openHostports(newPortMapping); err != nil { + return err + } + + // Add the new pod to active pods if it's not present. + var found bool + for _, pm := range activePodPortMappings { + if pm.Namespace == newPortMapping.Namespace && pm.Name == newPortMapping.Name { + found = true + break + } + } + if !found { + activePodPortMappings = append(activePodPortMappings, newPortMapping) + } + + return h.SyncHostports(natInterfaceName, activePodPortMappings) +} + +// SyncHostports gathers all hostports on node and setup iptables rules enable them. And finally clean up stale hostports +func (h *hostportSyncer) SyncHostports(natInterfaceName string, activePodPortMappings []*PodPortMapping) error { + start := time.Now() + defer func() { + glog.V(4).Infof("syncHostportsRules took %v", time.Since(start)) + }() + + hostportPodMap, err := gatherAllHostports(activePodPortMappings) + if err != nil { + return err + } + + // Ensure KUBE-HOSTPORTS chains + ensureKubeHostportChains(h.iptables, natInterfaceName) + + // Get iptables-save output so we can check for existing chains and rules. + // This will be a map of chain name to chain with rules as stored in iptables-save/iptables-restore + existingNATChains := make(map[utiliptables.Chain]string) + iptablesSaveRaw, err := h.iptables.Save(utiliptables.TableNAT) + if err != nil { // if we failed to get any rules + glog.Errorf("Failed to execute iptables-save, syncing all rules: %v", err) + } else { // otherwise parse the output + existingNATChains = utiliptables.GetChainLines(utiliptables.TableNAT, iptablesSaveRaw) + } + + natChains := bytes.NewBuffer(nil) + natRules := bytes.NewBuffer(nil) + writeLine(natChains, "*nat") + // Make sure we keep stats for the top-level chains, if they existed + // (which most should have because we created them above). + if chain, ok := existingNATChains[kubeHostportsChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(kubeHostportsChain)) + } + + // Accumulate NAT chains to keep. + activeNATChains := map[utiliptables.Chain]bool{} // use a map as a set + + for port, target := range hostportPodMap { + protocol := strings.ToLower(string(port.Protocol)) + hostportChain := hostportChainName(port, target.podFullName) + if chain, ok := existingNATChains[hostportChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(hostportChain)) + } + + activeNATChains[hostportChain] = true + + // Redirect to hostport chain + args := []string{ + "-A", string(kubeHostportsChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s hostport %d"`, target.podFullName, port.HostPort), + "-m", protocol, "-p", protocol, + "--dport", fmt.Sprintf("%d", port.HostPort), + "-j", string(hostportChain), + } + writeLine(natRules, args...) + + // Assuming kubelet is syncing iptables KUBE-MARK-MASQ chain + // If the request comes from the pod that is serving the hostport, then SNAT + args = []string{ + "-A", string(hostportChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s hostport %d"`, target.podFullName, port.HostPort), + "-s", target.podIP, "-j", string(iptablesproxy.KubeMarkMasqChain), + } + writeLine(natRules, args...) + + // Create hostport chain to DNAT traffic to final destination + // IPTables will maintained the stats for this chain + args = []string{ + "-A", string(hostportChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s hostport %d"`, target.podFullName, port.HostPort), + "-m", protocol, "-p", protocol, + "-j", "DNAT", fmt.Sprintf("--to-destination=%s:%d", target.podIP, port.ContainerPort), + } + writeLine(natRules, args...) + } + + // Delete chains no longer in use. + for chain := range existingNATChains { + if !activeNATChains[chain] { + chainString := string(chain) + if !strings.HasPrefix(chainString, kubeHostportChainPrefix) { + // Ignore chains that aren't ours. + continue + } + // We must (as per iptables) write a chain-line for it, which has + // the nice effect of flushing the chain. Then we can remove the + // chain. + writeLine(natChains, existingNATChains[chain]) + writeLine(natRules, "-X", chainString) + } + } + writeLine(natRules, "COMMIT") + + natLines := append(natChains.Bytes(), natRules.Bytes()...) + glog.V(3).Infof("Restoring iptables rules: %s", natLines) + err = h.iptables.RestoreAll(natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) + if err != nil { + return fmt.Errorf("Failed to execute iptables-restore: %v", err) + } + + h.cleanupHostportMap(hostportPodMap) + return nil +} + +// cleanupHostportMap closes obsolete hostports +func (h *hostportSyncer) cleanupHostportMap(containerPortMap map[*PortMapping]targetPod) { + // compute hostports that are supposed to be open + currentHostports := make(map[hostport]bool) + for containerPort := range containerPortMap { + hp := hostport{ + port: containerPort.HostPort, + protocol: strings.ToLower(string(containerPort.Protocol)), + } + currentHostports[hp] = true + } + + // close and delete obsolete hostports + for hp, socket := range h.hostPortMap { + if _, ok := currentHostports[hp]; !ok { + socket.Close() + glog.V(3).Infof("Closed local port %s", hp.String()) + delete(h.hostPortMap, hp) + } + } +} diff --git a/vendor/k8s.io/kubernetes/pkg/proxy/doc.go b/vendor/k8s.io/kubernetes/pkg/proxy/doc.go new file mode 100644 index 00000000..3bed0fa3 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/proxy/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package proxy implements the layer-3 network proxy. +package proxy // import "k8s.io/kubernetes/pkg/proxy" diff --git a/vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/doc.go b/vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/doc.go new file mode 100644 index 00000000..0a9ea094 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package healthcheck provides tools for serving kube-proxy healthchecks. +package healthcheck // import "k8s.io/kubernetes/pkg/proxy/healthcheck" diff --git a/vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/healthcheck.go b/vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/healthcheck.go new file mode 100644 index 00000000..999bc1b8 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/proxy/healthcheck/healthcheck.go @@ -0,0 +1,235 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthcheck + +import ( + "fmt" + "net" + "net/http" + "strings" + "sync" + + "github.com/golang/glog" + "github.com/renstrom/dedent" + + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/pkg/api" + clientv1 "k8s.io/client-go/pkg/api/v1" + "k8s.io/client-go/tools/record" +) + +// Server serves HTTP endpoints for each service name, with results +// based on the endpoints. If there are 0 endpoints for a service, it returns a +// 503 "Service Unavailable" error (telling LBs not to use this node). If there +// are 1 or more endpoints, it returns a 200 "OK". +type Server interface { + // Make the new set of services be active. Services that were open before + // will be closed. Services that are new will be opened. Service that + // existed and are in the new set will be left alone. The value of the map + // is the healthcheck-port to listen on. + SyncServices(newServices map[types.NamespacedName]uint16) error + // Make the new set of endpoints be active. Endpoints for services that do + // not exist will be dropped. The value of the map is the number of + // endpoints the service has on this node. + SyncEndpoints(newEndpoints map[types.NamespacedName]int) error +} + +// Listener allows for testing of Server. If the Listener argument +// to NewServer() is nil, the real net.Listen function will be used. +type Listener interface { + // Listen is very much like net.Listen, except the first arg (network) is + // fixed to be "tcp". + Listen(addr string) (net.Listener, error) +} + +// HTTPServerFactory allows for testing of Server. If the +// HTTPServerFactory argument to NewServer() is nil, the real +// http.Server type will be used. +type HTTPServerFactory interface { + // New creates an instance of a type satisfying HTTPServer. This is + // designed to include http.Server. + New(addr string, handler http.Handler) HTTPServer +} + +// HTTPServer allows for testing of Server. +type HTTPServer interface { + // Server is designed so that http.Server satifies this interface, + Serve(listener net.Listener) error +} + +// NewServer allocates a new healthcheck server manager. If either +// of the injected arguments are nil, defaults will be used. +func NewServer(hostname string, recorder record.EventRecorder, listener Listener, httpServerFactory HTTPServerFactory) Server { + if listener == nil { + listener = stdNetListener{} + } + if httpServerFactory == nil { + httpServerFactory = stdHTTPServerFactory{} + } + return &server{ + hostname: hostname, + recorder: recorder, + listener: listener, + httpFactory: httpServerFactory, + services: map[types.NamespacedName]*hcInstance{}, + } +} + +// Implement Listener in terms of net.Listen. +type stdNetListener struct{} + +func (stdNetListener) Listen(addr string) (net.Listener, error) { + return net.Listen("tcp", addr) +} + +var _ Listener = stdNetListener{} + +// Implement HTTPServerFactory in terms of http.Server. +type stdHTTPServerFactory struct{} + +func (stdHTTPServerFactory) New(addr string, handler http.Handler) HTTPServer { + return &http.Server{ + Addr: addr, + Handler: handler, + } +} + +var _ HTTPServerFactory = stdHTTPServerFactory{} + +type server struct { + hostname string + recorder record.EventRecorder // can be nil + listener Listener + httpFactory HTTPServerFactory + + lock sync.Mutex + services map[types.NamespacedName]*hcInstance +} + +func (hcs *server) SyncServices(newServices map[types.NamespacedName]uint16) error { + hcs.lock.Lock() + defer hcs.lock.Unlock() + + // Remove any that are not needed any more. + for nsn, svc := range hcs.services { + if port, found := newServices[nsn]; !found || port != svc.port { + glog.V(2).Infof("Closing healthcheck %q on port %d", nsn.String(), svc.port) + if err := svc.listener.Close(); err != nil { + glog.Errorf("Close(%v): %v", svc.listener.Addr(), err) + } + delete(hcs.services, nsn) + } + } + + // Add any that are needed. + for nsn, port := range newServices { + if hcs.services[nsn] != nil { + glog.V(3).Infof("Existing healthcheck %q on port %d", nsn.String(), port) + continue + } + + glog.V(2).Infof("Opening healthcheck %q on port %d", nsn.String(), port) + svc := &hcInstance{port: port} + addr := fmt.Sprintf(":%d", port) + svc.server = hcs.httpFactory.New(addr, hcHandler{name: nsn, hcs: hcs}) + var err error + svc.listener, err = hcs.listener.Listen(addr) + if err != nil { + msg := fmt.Sprintf("node %s failed to start healthcheck %q on port %d: %v", hcs.hostname, nsn.String(), port, err) + + if hcs.recorder != nil { + hcs.recorder.Eventf( + &clientv1.ObjectReference{ + Kind: "Service", + Namespace: nsn.Namespace, + Name: nsn.Name, + UID: types.UID(nsn.String()), + }, api.EventTypeWarning, "FailedToStartHealthcheck", msg) + } + glog.Error(msg) + continue + } + hcs.services[nsn] = svc + + go func(nsn types.NamespacedName, svc *hcInstance) { + // Serve() will exit when the listener is closed. + glog.V(3).Infof("Starting goroutine for healthcheck %q on port %d", nsn.String(), svc.port) + if err := svc.server.Serve(svc.listener); err != nil { + glog.V(3).Infof("Healthcheck %q closed: %v", nsn.String(), err) + return + } + glog.V(3).Infof("Healthcheck %q closed", nsn.String()) + }(nsn, svc) + } + return nil +} + +type hcInstance struct { + port uint16 + listener net.Listener + server HTTPServer + endpoints int // number of local endpoints for a service +} + +type hcHandler struct { + name types.NamespacedName + hcs *server +} + +var _ http.Handler = hcHandler{} + +func (h hcHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { + h.hcs.lock.Lock() + count := h.hcs.services[h.name].endpoints + h.hcs.lock.Unlock() + + resp.Header().Set("Content-Type", "application/json") + if count == 0 { + resp.WriteHeader(http.StatusServiceUnavailable) + } else { + resp.WriteHeader(http.StatusOK) + } + fmt.Fprintf(resp, strings.Trim(dedent.Dedent(fmt.Sprintf(` + { + "service": { + "namespace": %q, + "name": %q + }, + "localEndpoints": %d + } + `, h.name.Namespace, h.name.Name, count)), "\n")) +} + +func (hcs *server) SyncEndpoints(newEndpoints map[types.NamespacedName]int) error { + hcs.lock.Lock() + defer hcs.lock.Unlock() + + for nsn, count := range newEndpoints { + if hcs.services[nsn] == nil { + glog.V(3).Infof("Not saving endpoints for unknown healthcheck %q", nsn.String()) + continue + } + glog.V(3).Infof("Reporting %d endpoints for healthcheck %q", count, nsn.String()) + hcs.services[nsn].endpoints = count + } + for nsn, hci := range hcs.services { + if _, found := newEndpoints[nsn]; !found { + hci.endpoints = 0 + } + } + return nil +} diff --git a/vendor/k8s.io/kubernetes/pkg/proxy/iptables/proxier.go b/vendor/k8s.io/kubernetes/pkg/proxy/iptables/proxier.go new file mode 100644 index 00000000..799a1510 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/proxy/iptables/proxier.go @@ -0,0 +1,1390 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package iptables + +// +// NOTE: this needs to be tested in e2e since it uses iptables for everything. +// + +import ( + "bytes" + "crypto/sha256" + "encoding/base32" + "fmt" + "net" + "reflect" + "strconv" + "strings" + "sync" + "time" + + "github.com/davecgh/go-spew/spew" + "github.com/golang/glog" + + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + utilfeature "k8s.io/apiserver/pkg/util/feature" + clientv1 "k8s.io/client-go/pkg/api/v1" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/flowcontrol" + "k8s.io/kubernetes/pkg/api" + apiservice "k8s.io/kubernetes/pkg/api/service" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/proxy" + "k8s.io/kubernetes/pkg/proxy/healthcheck" + utilproxy "k8s.io/kubernetes/pkg/proxy/util" + utilexec "k8s.io/kubernetes/pkg/util/exec" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" + utilsysctl "k8s.io/kubernetes/pkg/util/sysctl" + utilversion "k8s.io/kubernetes/pkg/util/version" +) + +const ( + // iptablesMinVersion is the minimum version of iptables for which we will use the Proxier + // from this package instead of the userspace Proxier. While most of the + // features we need were available earlier, the '-C' flag was added more + // recently. We use that indirectly in Ensure* functions, and if we don't + // have it, we have to be extra careful about the exact args we feed in being + // the same as the args we read back (iptables itself normalizes some args). + // This is the "new" Proxier, so we require "new" versions of tools. + iptablesMinVersion = utiliptables.MinCheckVersion + + // the services chain + kubeServicesChain utiliptables.Chain = "KUBE-SERVICES" + + // the nodeports chain + kubeNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS" + + // the kubernetes postrouting chain + kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING" + + // the mark-for-masquerade chain + KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ" + + // the mark-for-drop chain + KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP" +) + +// IPTablesVersioner can query the current iptables version. +type IPTablesVersioner interface { + // returns "X.Y.Z" + GetVersion() (string, error) +} + +// KernelCompatTester tests whether the required kernel capabilities are +// present to run the iptables proxier. +type KernelCompatTester interface { + IsCompatible() error +} + +// CanUseIPTablesProxier returns true if we should use the iptables Proxier +// instead of the "classic" userspace Proxier. This is determined by checking +// the iptables version and for the existence of kernel features. It may return +// an error if it fails to get the iptables version without error, in which +// case it will also return false. +func CanUseIPTablesProxier(iptver IPTablesVersioner, kcompat KernelCompatTester) (bool, error) { + minVersion, err := utilversion.ParseGeneric(iptablesMinVersion) + if err != nil { + return false, err + } + versionString, err := iptver.GetVersion() + if err != nil { + return false, err + } + version, err := utilversion.ParseGeneric(versionString) + if err != nil { + return false, err + } + if version.LessThan(minVersion) { + return false, nil + } + + // Check that the kernel supports what we need. + if err := kcompat.IsCompatible(); err != nil { + return false, err + } + return true, nil +} + +type LinuxKernelCompatTester struct{} + +func (lkct LinuxKernelCompatTester) IsCompatible() error { + // Check for the required sysctls. We don't care about the value, just + // that it exists. If this Proxier is chosen, we'll initialize it as we + // need. + _, err := utilsysctl.New().GetSysctl(sysctlRouteLocalnet) + return err +} + +const sysctlRouteLocalnet = "net/ipv4/conf/all/route_localnet" +const sysctlBridgeCallIPTables = "net/bridge/bridge-nf-call-iptables" + +// internal struct for string service information +type serviceInfo struct { + clusterIP net.IP + port int + protocol api.Protocol + nodePort int + loadBalancerStatus api.LoadBalancerStatus + sessionAffinityType api.ServiceAffinity + stickyMaxAgeMinutes int + externalIPs []string + loadBalancerSourceRanges []string + onlyNodeLocalEndpoints bool + healthCheckNodePort int +} + +// internal struct for endpoints information +type endpointsInfo struct { + endpoint string // TODO: should be an endpointString type + isLocal bool +} + +// returns a new serviceInfo struct +func newServiceInfo(serviceName proxy.ServicePortName, port *api.ServicePort, service *api.Service) *serviceInfo { + onlyNodeLocalEndpoints := apiservice.NeedsHealthCheck(service) && utilfeature.DefaultFeatureGate.Enabled(features.ExternalTrafficLocalOnly) && (service.Spec.Type == api.ServiceTypeLoadBalancer || service.Spec.Type == api.ServiceTypeNodePort) + info := &serviceInfo{ + clusterIP: net.ParseIP(service.Spec.ClusterIP), + port: int(port.Port), + protocol: port.Protocol, + nodePort: int(port.NodePort), + // Deep-copy in case the service instance changes + loadBalancerStatus: *api.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer), + sessionAffinityType: service.Spec.SessionAffinity, + stickyMaxAgeMinutes: 180, // TODO: paramaterize this in the API. + externalIPs: make([]string, len(service.Spec.ExternalIPs)), + loadBalancerSourceRanges: make([]string, len(service.Spec.LoadBalancerSourceRanges)), + onlyNodeLocalEndpoints: onlyNodeLocalEndpoints, + } + copy(info.loadBalancerSourceRanges, service.Spec.LoadBalancerSourceRanges) + copy(info.externalIPs, service.Spec.ExternalIPs) + + if info.onlyNodeLocalEndpoints { + p := apiservice.GetServiceHealthCheckNodePort(service) + if p == 0 { + glog.Errorf("Service does not contain necessary annotation %v", + apiservice.BetaAnnotationHealthCheckNodePort) + } else { + info.healthCheckNodePort = int(p) + } + } + + return info +} + +type proxyServiceMap map[proxy.ServicePortName]*serviceInfo + +type proxyEndpointMap map[proxy.ServicePortName][]*endpointsInfo + +// Proxier is an iptables based proxy for connections between a localhost:lport +// and services that provide the actual backends. +type Proxier struct { + mu sync.Mutex // protects the following fields + serviceMap proxyServiceMap + endpointsMap proxyEndpointMap + portsMap map[localPort]closeable + haveReceivedServiceUpdate bool // true once we've seen an OnServiceUpdate event + allEndpoints []api.Endpoints // nil until we have seen an OnEndpointsUpdate event + throttle flowcontrol.RateLimiter + + // These are effectively const and do not need the mutex to be held. + syncPeriod time.Duration + minSyncPeriod time.Duration + iptables utiliptables.Interface + masqueradeAll bool + masqueradeMark string + exec utilexec.Interface + clusterCIDR string + hostname string + nodeIP net.IP + portMapper portOpener + recorder record.EventRecorder + healthChecker healthcheck.Server +} + +type localPort struct { + desc string + ip string + port int + protocol string +} + +func (lp *localPort) String() string { + return fmt.Sprintf("%q (%s:%d/%s)", lp.desc, lp.ip, lp.port, lp.protocol) +} + +type closeable interface { + Close() error +} + +// portOpener is an interface around port opening/closing. +// Abstracted out for testing. +type portOpener interface { + OpenLocalPort(lp *localPort) (closeable, error) +} + +// listenPortOpener opens ports by calling bind() and listen(). +type listenPortOpener struct{} + +// OpenLocalPort holds the given local port open. +func (l *listenPortOpener) OpenLocalPort(lp *localPort) (closeable, error) { + return openLocalPort(lp) +} + +// Proxier implements ProxyProvider +var _ proxy.ProxyProvider = &Proxier{} + +// NewProxier returns a new Proxier given an iptables Interface instance. +// Because of the iptables logic, it is assumed that there is only a single Proxier active on a machine. +// An error will be returned if iptables fails to update or acquire the initial lock. +// Once a proxier is created, it will keep iptables up to date in the background and +// will not terminate if a particular iptables call fails. +func NewProxier(ipt utiliptables.Interface, + sysctl utilsysctl.Interface, + exec utilexec.Interface, + syncPeriod time.Duration, + minSyncPeriod time.Duration, + masqueradeAll bool, + masqueradeBit int, + clusterCIDR string, + hostname string, + nodeIP net.IP, + recorder record.EventRecorder, +) (*Proxier, error) { + // check valid user input + if minSyncPeriod > syncPeriod { + return nil, fmt.Errorf("min-sync (%v) must be < sync(%v)", minSyncPeriod, syncPeriod) + } + + // Set the route_localnet sysctl we need for + if err := sysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil { + return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err) + } + + // Proxy needs br_netfilter and bridge-nf-call-iptables=1 when containers + // are connected to a Linux bridge (but not SDN bridges). Until most + // plugins handle this, log when config is missing + if val, err := sysctl.GetSysctl(sysctlBridgeCallIPTables); err == nil && val != 1 { + glog.Infof("missing br-netfilter module or unset sysctl br-nf-call-iptables; proxy may not work as intended") + } + + // Generate the masquerade mark to use for SNAT rules. + if masqueradeBit < 0 || masqueradeBit > 31 { + return nil, fmt.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", masqueradeBit) + } + masqueradeValue := 1 << uint(masqueradeBit) + masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue) + + if nodeIP == nil { + glog.Warningf("invalid nodeIP, initializing kube-proxy with 127.0.0.1 as nodeIP") + nodeIP = net.ParseIP("127.0.0.1") + } + + if len(clusterCIDR) == 0 { + glog.Warningf("clusterCIDR not specified, unable to distinguish between internal and external traffic") + } + + healthChecker := healthcheck.NewServer(hostname, recorder, nil, nil) // use default implementations of deps + + var throttle flowcontrol.RateLimiter + // Defaulting back to not limit sync rate when minSyncPeriod is 0. + if minSyncPeriod != 0 { + syncsPerSecond := float32(time.Second) / float32(minSyncPeriod) + // The average use case will process 2 updates in short succession + throttle = flowcontrol.NewTokenBucketRateLimiter(syncsPerSecond, 2) + } + + return &Proxier{ + serviceMap: make(proxyServiceMap), + endpointsMap: make(proxyEndpointMap), + portsMap: make(map[localPort]closeable), + syncPeriod: syncPeriod, + minSyncPeriod: minSyncPeriod, + throttle: throttle, + iptables: ipt, + masqueradeAll: masqueradeAll, + masqueradeMark: masqueradeMark, + exec: exec, + clusterCIDR: clusterCIDR, + hostname: hostname, + nodeIP: nodeIP, + portMapper: &listenPortOpener{}, + recorder: recorder, + healthChecker: healthChecker, + }, nil +} + +// CleanupLeftovers removes all iptables rules and chains created by the Proxier +// It returns true if an error was encountered. Errors are logged. +func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) { + // Unlink the services chain. + args := []string{ + "-m", "comment", "--comment", "kubernetes service portals", + "-j", string(kubeServicesChain), + } + tableChainsWithJumpServices := []struct { + table utiliptables.Table + chain utiliptables.Chain + }{ + {utiliptables.TableFilter, utiliptables.ChainInput}, + {utiliptables.TableFilter, utiliptables.ChainOutput}, + {utiliptables.TableNAT, utiliptables.ChainOutput}, + {utiliptables.TableNAT, utiliptables.ChainPrerouting}, + } + for _, tc := range tableChainsWithJumpServices { + if err := ipt.DeleteRule(tc.table, tc.chain, args...); err != nil { + if !utiliptables.IsNotFoundError(err) { + glog.Errorf("Error removing pure-iptables proxy rule: %v", err) + encounteredError = true + } + } + } + + // Unlink the postrouting chain. + args = []string{ + "-m", "comment", "--comment", "kubernetes postrouting rules", + "-j", string(kubePostroutingChain), + } + if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { + if !utiliptables.IsNotFoundError(err) { + glog.Errorf("Error removing pure-iptables proxy rule: %v", err) + encounteredError = true + } + } + + // Flush and remove all of our chains. + if iptablesSaveRaw, err := ipt.Save(utiliptables.TableNAT); err != nil { + glog.Errorf("Failed to execute iptables-save for %s: %v", utiliptables.TableNAT, err) + encounteredError = true + } else { + existingNATChains := utiliptables.GetChainLines(utiliptables.TableNAT, iptablesSaveRaw) + natChains := bytes.NewBuffer(nil) + natRules := bytes.NewBuffer(nil) + writeLine(natChains, "*nat") + // Start with chains we know we need to remove. + for _, chain := range []utiliptables.Chain{kubeServicesChain, kubeNodePortsChain, kubePostroutingChain, KubeMarkMasqChain} { + if _, found := existingNATChains[chain]; found { + chainString := string(chain) + writeLine(natChains, existingNATChains[chain]) // flush + writeLine(natRules, "-X", chainString) // delete + } + } + // Hunt for service and endpoint chains. + for chain := range existingNATChains { + chainString := string(chain) + if strings.HasPrefix(chainString, "KUBE-SVC-") || strings.HasPrefix(chainString, "KUBE-SEP-") || strings.HasPrefix(chainString, "KUBE-FW-") || strings.HasPrefix(chainString, "KUBE-XLB-") { + writeLine(natChains, existingNATChains[chain]) // flush + writeLine(natRules, "-X", chainString) // delete + } + } + writeLine(natRules, "COMMIT") + natLines := append(natChains.Bytes(), natRules.Bytes()...) + // Write it. + err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) + if err != nil { + glog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableNAT, err) + encounteredError = true + } + } + { + filterBuf := bytes.NewBuffer(nil) + writeLine(filterBuf, "*filter") + writeLine(filterBuf, fmt.Sprintf(":%s - [0:0]", kubeServicesChain)) + writeLine(filterBuf, fmt.Sprintf("-X %s", kubeServicesChain)) + writeLine(filterBuf, "COMMIT") + // Write it. + if err := ipt.Restore(utiliptables.TableFilter, filterBuf.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil { + glog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableFilter, err) + encounteredError = true + } + } + return encounteredError +} + +// Sync is called to immediately synchronize the proxier state to iptables +func (proxier *Proxier) Sync() { + proxier.mu.Lock() + defer proxier.mu.Unlock() + proxier.syncProxyRules() +} + +// SyncLoop runs periodic work. This is expected to run as a goroutine or as the main loop of the app. It does not return. +func (proxier *Proxier) SyncLoop() { + t := time.NewTicker(proxier.syncPeriod) + defer t.Stop() + for { + <-t.C + glog.V(6).Infof("Periodic sync") + proxier.Sync() + } +} + +// Accepts a list of Services and the existing service map. Returns the new +// service map, a map of healthcheck ports, and a set of stale UDP +// services. +func buildServiceMap(allServices []api.Service, oldServiceMap proxyServiceMap) (proxyServiceMap, map[types.NamespacedName]uint16, sets.String) { + newServiceMap := make(proxyServiceMap) + hcPorts := make(map[types.NamespacedName]uint16) + + for i := range allServices { + service := &allServices[i] + svcName := types.NamespacedName{ + Namespace: service.Namespace, + Name: service.Name, + } + + // if ClusterIP is "None" or empty, skip proxying + if !api.IsServiceIPSet(service) { + glog.V(3).Infof("Skipping service %s due to clusterIP = %q", svcName, service.Spec.ClusterIP) + continue + } + // Even if ClusterIP is set, ServiceTypeExternalName services don't get proxied + if service.Spec.Type == api.ServiceTypeExternalName { + glog.V(3).Infof("Skipping service %s due to Type=ExternalName", svcName) + continue + } + + for i := range service.Spec.Ports { + servicePort := &service.Spec.Ports[i] + + serviceName := proxy.ServicePortName{ + NamespacedName: svcName, + Port: servicePort.Name, + } + + info := newServiceInfo(serviceName, servicePort, service) + oldInfo, exists := oldServiceMap[serviceName] + equal := reflect.DeepEqual(info, oldInfo) + if !exists { + glog.V(1).Infof("Adding new service %q at %s:%d/%s", serviceName, info.clusterIP, servicePort.Port, servicePort.Protocol) + } else if !equal { + glog.V(1).Infof("Updating existing service %q at %s:%d/%s", serviceName, info.clusterIP, servicePort.Port, servicePort.Protocol) + } + + if info.onlyNodeLocalEndpoints { + hcPorts[svcName] = uint16(info.healthCheckNodePort) + } + + newServiceMap[serviceName] = info + glog.V(4).Infof("added serviceInfo(%s): %s", serviceName, spew.Sdump(info)) + } + } + + for nsn, port := range hcPorts { + if port == 0 { + glog.Errorf("Service %q has no healthcheck nodeport", nsn) + delete(hcPorts, nsn) + } + } + + staleUDPServices := sets.NewString() + // Remove serviceports missing from the update. + for name, info := range oldServiceMap { + if _, exists := newServiceMap[name]; !exists { + glog.V(1).Infof("Removing service %q", name) + if info.protocol == api.ProtocolUDP { + staleUDPServices.Insert(info.clusterIP.String()) + } + } + } + + return newServiceMap, hcPorts, staleUDPServices +} + +// OnServiceUpdate tracks the active set of service proxies. +// They will be synchronized using syncProxyRules() +func (proxier *Proxier) OnServiceUpdate(allServices []api.Service) { + start := time.Now() + defer func() { + glog.V(4).Infof("OnServiceUpdate took %v for %d services", time.Since(start), len(allServices)) + }() + proxier.mu.Lock() + defer proxier.mu.Unlock() + proxier.haveReceivedServiceUpdate = true + + newServiceMap, hcPorts, staleUDPServices := buildServiceMap(allServices, proxier.serviceMap) + + // update healthcheck ports + if err := proxier.healthChecker.SyncServices(hcPorts); err != nil { + glog.Errorf("Error syncing healtcheck ports: %v", err) + } + + if len(newServiceMap) != len(proxier.serviceMap) || !reflect.DeepEqual(newServiceMap, proxier.serviceMap) { + proxier.serviceMap = newServiceMap + proxier.syncProxyRules() + } else { + glog.V(4).Infof("Skipping proxy iptables rule sync on service update because nothing changed") + } + + utilproxy.DeleteServiceConnections(proxier.exec, staleUDPServices.List()) +} + +// OnEndpointsUpdate takes in a slice of updated endpoints. +func (proxier *Proxier) OnEndpointsUpdate(allEndpoints []api.Endpoints) { + proxier.mu.Lock() + defer proxier.mu.Unlock() + if proxier.allEndpoints == nil { + glog.V(2).Info("Received first Endpoints update") + } + proxier.allEndpoints = allEndpoints + + // TODO: once service has made this same transform, move this into proxier.syncProxyRules() + newMap, hcEndpoints, staleConnections := updateEndpoints(proxier.allEndpoints, proxier.endpointsMap, proxier.hostname) + + // update healthcheck endpoints + if err := proxier.healthChecker.SyncEndpoints(hcEndpoints); err != nil { + glog.Errorf("Error syncing healthcheck endoints: %v", err) + } + + if len(newMap) != len(proxier.endpointsMap) || !reflect.DeepEqual(newMap, proxier.endpointsMap) { + proxier.endpointsMap = newMap + proxier.syncProxyRules() + } else { + glog.V(4).Infof("Skipping proxy iptables rule sync on endpoint update because nothing changed") + } + + proxier.deleteEndpointConnections(staleConnections) +} + +// Convert a slice of api.Endpoints objects into a map of service-port -> endpoints. +func updateEndpoints(allEndpoints []api.Endpoints, curMap proxyEndpointMap, hostname string) (newMap proxyEndpointMap, hcEndpoints map[types.NamespacedName]int, staleSet map[endpointServicePair]bool) { + + // return values + newMap = make(proxyEndpointMap) + hcEndpoints = make(map[types.NamespacedName]int) + staleSet = make(map[endpointServicePair]bool) + + // Update endpoints for services. + for i := range allEndpoints { + accumulateEndpointsMap(&allEndpoints[i], hostname, curMap, &newMap) + } + // Check stale connections against endpoints missing from the update. + // TODO: we should really only mark a connection stale if the proto was UDP + // and the (ip, port, proto) was removed from the endpoints. + for svcPort, epList := range curMap { + for _, ep := range epList { + stale := true + for i := range newMap[svcPort] { + if *newMap[svcPort][i] == *ep { + stale = false + break + } + } + if stale { + glog.V(4).Infof("Stale endpoint %v -> %v", svcPort, ep.endpoint) + staleSet[endpointServicePair{endpoint: ep.endpoint, servicePortName: svcPort}] = true + } + } + } + + if !utilfeature.DefaultFeatureGate.Enabled(features.ExternalTrafficLocalOnly) { + return + } + + // accumulate local IPs per service, ignoring ports + localIPs := map[types.NamespacedName]sets.String{} + for svcPort := range newMap { + for _, ep := range newMap[svcPort] { + if ep.isLocal { + nsn := svcPort.NamespacedName + if localIPs[nsn] == nil { + localIPs[nsn] = sets.NewString() + } + ip := strings.Split(ep.endpoint, ":")[0] // just the IP part + localIPs[nsn].Insert(ip) + } + } + } + // produce a count per service + for nsn, ips := range localIPs { + hcEndpoints[nsn] = len(ips) + } + + return newMap, hcEndpoints, staleSet +} + +// Gather information about all the endpoint state for a given api.Endpoints. +// This can not report complete info on stale connections because it has limited +// scope - it only knows one Endpoints, but sees the whole current map. That +// cleanup has to be done above. +// +// TODO: this could be simplified: +// - hostPortInfo and endpointsInfo overlap too much +// - the test for this is overlapped by the test for updateEndpoints +// - naming is poor and responsibilities are muddled +func accumulateEndpointsMap(endpoints *api.Endpoints, hostname string, + curEndpoints proxyEndpointMap, + newEndpoints *proxyEndpointMap) { + + // We need to build a map of portname -> all ip:ports for that + // portname. Explode Endpoints.Subsets[*] into this structure. + for i := range endpoints.Subsets { + ss := &endpoints.Subsets[i] + for i := range ss.Ports { + port := &ss.Ports[i] + if port.Port == 0 { + glog.Warningf("ignoring invalid endpoint port %s", port.Name) + continue + } + svcPort := proxy.ServicePortName{ + NamespacedName: types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name}, + Port: port.Name, + } + for i := range ss.Addresses { + addr := &ss.Addresses[i] + if addr.IP == "" { + glog.Warningf("ignoring invalid endpoint port %s with empty host", port.Name) + continue + } + epInfo := &endpointsInfo{ + endpoint: net.JoinHostPort(addr.IP, strconv.Itoa(int(port.Port))), + isLocal: addr.NodeName != nil && *addr.NodeName == hostname, + } + (*newEndpoints)[svcPort] = append((*newEndpoints)[svcPort], epInfo) + } + } + } +} + +// portProtoHash takes the ServicePortName and protocol for a service +// returns the associated 16 character hash. This is computed by hashing (sha256) +// then encoding to base32 and truncating to 16 chars. We do this because IPTables +// Chain Names must be <= 28 chars long, and the longer they are the harder they are to read. +func portProtoHash(s proxy.ServicePortName, protocol string) string { + hash := sha256.Sum256([]byte(s.String() + protocol)) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return encoded[:16] +} + +// servicePortChainName takes the ServicePortName for a service and +// returns the associated iptables chain. This is computed by hashing (sha256) +// then encoding to base32 and truncating with the prefix "KUBE-SVC-". +func servicePortChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain { + return utiliptables.Chain("KUBE-SVC-" + portProtoHash(s, protocol)) +} + +// serviceFirewallChainName takes the ServicePortName for a service and +// returns the associated iptables chain. This is computed by hashing (sha256) +// then encoding to base32 and truncating with the prefix "KUBE-FW-". +func serviceFirewallChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain { + return utiliptables.Chain("KUBE-FW-" + portProtoHash(s, protocol)) +} + +// serviceLBPortChainName takes the ServicePortName for a service and +// returns the associated iptables chain. This is computed by hashing (sha256) +// then encoding to base32 and truncating with the prefix "KUBE-XLB-". We do +// this because IPTables Chain Names must be <= 28 chars long, and the longer +// they are the harder they are to read. +func serviceLBChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain { + return utiliptables.Chain("KUBE-XLB-" + portProtoHash(s, protocol)) +} + +// This is the same as servicePortChainName but with the endpoint included. +func servicePortEndpointChainName(s proxy.ServicePortName, protocol string, endpoint string) utiliptables.Chain { + hash := sha256.Sum256([]byte(s.String() + protocol + endpoint)) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return utiliptables.Chain("KUBE-SEP-" + encoded[:16]) +} + +type endpointServicePair struct { + endpoint string + servicePortName proxy.ServicePortName +} + +const noConnectionToDelete = "0 flow entries have been deleted" + +// After a UDP endpoint has been removed, we must flush any pending conntrack entries to it, or else we +// risk sending more traffic to it, all of which will be lost (because UDP). +// This assumes the proxier mutex is held +func (proxier *Proxier) deleteEndpointConnections(connectionMap map[endpointServicePair]bool) { + for epSvcPair := range connectionMap { + if svcInfo, ok := proxier.serviceMap[epSvcPair.servicePortName]; ok && svcInfo.protocol == api.ProtocolUDP { + endpointIP := strings.Split(epSvcPair.endpoint, ":")[0] + glog.V(2).Infof("Deleting connection tracking state for service IP %s, endpoint IP %s", svcInfo.clusterIP.String(), endpointIP) + err := utilproxy.ExecConntrackTool(proxier.exec, "-D", "--orig-dst", svcInfo.clusterIP.String(), "--dst-nat", endpointIP, "-p", "udp") + if err != nil && !strings.Contains(err.Error(), noConnectionToDelete) { + // TODO: Better handling for deletion failure. When failure occur, stale udp connection may not get flushed. + // These stale udp connection will keep black hole traffic. Making this a best effort operation for now, since it + // is expensive to baby sit all udp connections to kubernetes services. + glog.Errorf("conntrack return with error: %v", err) + } + } + } +} + +// This is where all of the iptables-save/restore calls happen. +// The only other iptables rules are those that are setup in iptablesInit() +// assumes proxier.mu is held +func (proxier *Proxier) syncProxyRules() { + if proxier.throttle != nil { + proxier.throttle.Accept() + } + start := time.Now() + defer func() { + glog.V(4).Infof("syncProxyRules took %v", time.Since(start)) + }() + // don't sync rules till we've received services and endpoints + if proxier.allEndpoints == nil || !proxier.haveReceivedServiceUpdate { + glog.V(2).Info("Not syncing iptables until Services and Endpoints have been received from master") + return + } + glog.V(3).Infof("Syncing iptables rules") + + // Create and link the kube services chain. + { + tablesNeedServicesChain := []utiliptables.Table{utiliptables.TableFilter, utiliptables.TableNAT} + for _, table := range tablesNeedServicesChain { + if _, err := proxier.iptables.EnsureChain(table, kubeServicesChain); err != nil { + glog.Errorf("Failed to ensure that %s chain %s exists: %v", table, kubeServicesChain, err) + return + } + } + + tableChainsNeedJumpServices := []struct { + table utiliptables.Table + chain utiliptables.Chain + }{ + {utiliptables.TableFilter, utiliptables.ChainInput}, + {utiliptables.TableFilter, utiliptables.ChainOutput}, + {utiliptables.TableNAT, utiliptables.ChainOutput}, + {utiliptables.TableNAT, utiliptables.ChainPrerouting}, + } + comment := "kubernetes service portals" + args := []string{"-m", "comment", "--comment", comment, "-j", string(kubeServicesChain)} + for _, tc := range tableChainsNeedJumpServices { + if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil { + glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, kubeServicesChain, err) + return + } + } + } + + // Create and link the kube postrouting chain. + { + if _, err := proxier.iptables.EnsureChain(utiliptables.TableNAT, kubePostroutingChain); err != nil { + glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, kubePostroutingChain, err) + return + } + + comment := "kubernetes postrouting rules" + args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)} + if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { + glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, err) + return + } + } + + // + // Below this point we will not return until we try to write the iptables rules. + // + + // Get iptables-save output so we can check for existing chains and rules. + // This will be a map of chain name to chain with rules as stored in iptables-save/iptables-restore + existingFilterChains := make(map[utiliptables.Chain]string) + iptablesSaveRaw, err := proxier.iptables.Save(utiliptables.TableFilter) + if err != nil { // if we failed to get any rules + glog.Errorf("Failed to execute iptables-save, syncing all rules: %v", err) + } else { // otherwise parse the output + existingFilterChains = utiliptables.GetChainLines(utiliptables.TableFilter, iptablesSaveRaw) + } + + existingNATChains := make(map[utiliptables.Chain]string) + iptablesSaveRaw, err = proxier.iptables.Save(utiliptables.TableNAT) + if err != nil { // if we failed to get any rules + glog.Errorf("Failed to execute iptables-save, syncing all rules: %v", err) + } else { // otherwise parse the output + existingNATChains = utiliptables.GetChainLines(utiliptables.TableNAT, iptablesSaveRaw) + } + + filterChains := bytes.NewBuffer(nil) + filterRules := bytes.NewBuffer(nil) + natChains := bytes.NewBuffer(nil) + natRules := bytes.NewBuffer(nil) + + // Write table headers. + writeLine(filterChains, "*filter") + writeLine(natChains, "*nat") + + // Make sure we keep stats for the top-level chains, if they existed + // (which most should have because we created them above). + if chain, ok := existingFilterChains[kubeServicesChain]; ok { + writeLine(filterChains, chain) + } else { + writeLine(filterChains, utiliptables.MakeChainLine(kubeServicesChain)) + } + if chain, ok := existingNATChains[kubeServicesChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(kubeServicesChain)) + } + if chain, ok := existingNATChains[kubeNodePortsChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(kubeNodePortsChain)) + } + if chain, ok := existingNATChains[kubePostroutingChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(kubePostroutingChain)) + } + if chain, ok := existingNATChains[KubeMarkMasqChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(KubeMarkMasqChain)) + } + + // Install the kubernetes-specific postrouting rules. We use a whole chain for + // this so that it is easier to flush and change, for example if the mark + // value should ever change. + writeLine(natRules, []string{ + "-A", string(kubePostroutingChain), + "-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`, + "-m", "mark", "--mark", proxier.masqueradeMark, + "-j", "MASQUERADE", + }...) + + // Install the kubernetes-specific masquerade mark rule. We use a whole chain for + // this so that it is easier to flush and change, for example if the mark + // value should ever change. + writeLine(natRules, []string{ + "-A", string(KubeMarkMasqChain), + "-j", "MARK", "--set-xmark", proxier.masqueradeMark, + }...) + + // Accumulate NAT chains to keep. + activeNATChains := map[utiliptables.Chain]bool{} // use a map as a set + + // Accumulate the set of local ports that we will be holding open once this update is complete + replacementPortsMap := map[localPort]closeable{} + + // Build rules for each service. + for svcName, svcInfo := range proxier.serviceMap { + protocol := strings.ToLower(string(svcInfo.protocol)) + + // Create the per-service chain, retaining counters if possible. + svcChain := servicePortChainName(svcName, protocol) + if chain, ok := existingNATChains[svcChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(svcChain)) + } + activeNATChains[svcChain] = true + + svcXlbChain := serviceLBChainName(svcName, protocol) + if svcInfo.onlyNodeLocalEndpoints { + // Only for services with the externalTraffic annotation set to OnlyLocal + // create the per-service LB chain, retaining counters if possible. + if lbChain, ok := existingNATChains[svcXlbChain]; ok { + writeLine(natChains, lbChain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(svcXlbChain)) + } + activeNATChains[svcXlbChain] = true + } else if activeNATChains[svcXlbChain] { + // Cleanup the previously created XLB chain for this service + delete(activeNATChains, svcXlbChain) + } + + // Capture the clusterIP. + args := []string{ + "-A", string(kubeServicesChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcName.String()), + "-m", protocol, "-p", protocol, + "-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()), + "--dport", fmt.Sprintf("%d", svcInfo.port), + } + if proxier.masqueradeAll { + writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) + } + if len(proxier.clusterCIDR) > 0 { + writeLine(natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...) + } + writeLine(natRules, append(args, "-j", string(svcChain))...) + + // Capture externalIPs. + for _, externalIP := range svcInfo.externalIPs { + // If the "external" IP happens to be an IP that is local to this + // machine, hold the local port open so no other process can open it + // (because the socket might open but it would never work). + if local, err := isLocalIP(externalIP); err != nil { + glog.Errorf("can't determine if IP is local, assuming not: %v", err) + } else if local { + lp := localPort{ + desc: "externalIP for " + svcName.String(), + ip: externalIP, + port: svcInfo.port, + protocol: protocol, + } + if proxier.portsMap[lp] != nil { + glog.V(4).Infof("Port %s was open before and is still needed", lp.String()) + replacementPortsMap[lp] = proxier.portsMap[lp] + } else { + socket, err := proxier.portMapper.OpenLocalPort(&lp) + if err != nil { + msg := fmt.Sprintf("can't open %s, skipping this externalIP: %v", lp.String(), err) + + proxier.recorder.Eventf( + &clientv1.ObjectReference{ + Kind: "Node", + Name: proxier.hostname, + UID: types.UID(proxier.hostname), + Namespace: "", + }, api.EventTypeWarning, err.Error(), msg) + glog.Error(msg) + continue + } + replacementPortsMap[lp] = socket + } + } // We're holding the port, so it's OK to install iptables rules. + args := []string{ + "-A", string(kubeServicesChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcName.String()), + "-m", protocol, "-p", protocol, + "-d", fmt.Sprintf("%s/32", externalIP), + "--dport", fmt.Sprintf("%d", svcInfo.port), + } + // We have to SNAT packets to external IPs. + writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) + + // Allow traffic for external IPs that does not come from a bridge (i.e. not from a container) + // nor from a local process to be forwarded to the service. + // This rule roughly translates to "all traffic from off-machine". + // This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later. + externalTrafficOnlyArgs := append(args, + "-m", "physdev", "!", "--physdev-is-in", + "-m", "addrtype", "!", "--src-type", "LOCAL") + writeLine(natRules, append(externalTrafficOnlyArgs, "-j", string(svcChain))...) + dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL") + // Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local. + // This covers cases like GCE load-balancers which get added to the local routing table. + writeLine(natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...) + } + + // Capture load-balancer ingress. + for _, ingress := range svcInfo.loadBalancerStatus.Ingress { + if ingress.IP != "" { + // create service firewall chain + fwChain := serviceFirewallChainName(svcName, protocol) + if chain, ok := existingNATChains[fwChain]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(fwChain)) + } + activeNATChains[fwChain] = true + // The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field. + // This currently works for loadbalancers that preserves source ips. + // For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply. + + args := []string{ + "-A", string(kubeServicesChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcName.String()), + "-m", protocol, "-p", protocol, + "-d", fmt.Sprintf("%s/32", ingress.IP), + "--dport", fmt.Sprintf("%d", svcInfo.port), + } + // jump to service firewall chain + writeLine(natRules, append(args, "-j", string(fwChain))...) + + args = []string{ + "-A", string(fwChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcName.String()), + } + + // Each source match rule in the FW chain may jump to either the SVC or the XLB chain + chosenChain := svcXlbChain + // If we are proxying globally, we need to masquerade in case we cross nodes. + // If we are proxying only locally, we can retain the source IP. + if !svcInfo.onlyNodeLocalEndpoints { + writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) + chosenChain = svcChain + } + + if len(svcInfo.loadBalancerSourceRanges) == 0 { + // allow all sources, so jump directly to the KUBE-SVC or KUBE-XLB chain + writeLine(natRules, append(args, "-j", string(chosenChain))...) + } else { + // firewall filter based on each source range + allowFromNode := false + for _, src := range svcInfo.loadBalancerSourceRanges { + writeLine(natRules, append(args, "-s", src, "-j", string(chosenChain))...) + // ignore error because it has been validated + _, cidr, _ := net.ParseCIDR(src) + if cidr.Contains(proxier.nodeIP) { + allowFromNode = true + } + } + // generally, ip route rule was added to intercept request to loadbalancer vip from the + // loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly. + // Need to add the following rule to allow request on host. + if allowFromNode { + writeLine(natRules, append(args, "-s", fmt.Sprintf("%s/32", ingress.IP), "-j", string(chosenChain))...) + } + } + + // If the packet was able to reach the end of firewall chain, then it did not get DNATed. + // It means the packet cannot go thru the firewall, then mark it for DROP + writeLine(natRules, append(args, "-j", string(KubeMarkDropChain))...) + } + } + + // Capture nodeports. If we had more than 2 rules it might be + // worthwhile to make a new per-service chain for nodeport rules, but + // with just 2 rules it ends up being a waste and a cognitive burden. + if svcInfo.nodePort != 0 { + // Hold the local port open so no other process can open it + // (because the socket might open but it would never work). + lp := localPort{ + desc: "nodePort for " + svcName.String(), + ip: "", + port: svcInfo.nodePort, + protocol: protocol, + } + if proxier.portsMap[lp] != nil { + glog.V(4).Infof("Port %s was open before and is still needed", lp.String()) + replacementPortsMap[lp] = proxier.portsMap[lp] + } else { + socket, err := proxier.portMapper.OpenLocalPort(&lp) + if err != nil { + glog.Errorf("can't open %s, skipping this nodePort: %v", lp.String(), err) + continue + } + if lp.protocol == "udp" { + proxier.clearUdpConntrackForPort(lp.port) + } + replacementPortsMap[lp] = socket + } // We're holding the port, so it's OK to install iptables rules. + + args := []string{ + "-A", string(kubeNodePortsChain), + "-m", "comment", "--comment", svcName.String(), + "-m", protocol, "-p", protocol, + "--dport", fmt.Sprintf("%d", svcInfo.nodePort), + } + if !svcInfo.onlyNodeLocalEndpoints { + // Nodeports need SNAT, unless they're local. + writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) + // Jump to the service chain. + writeLine(natRules, append(args, "-j", string(svcChain))...) + } else { + // TODO: Make all nodePorts jump to the firewall chain. + // Currently we only create it for loadbalancers (#33586). + writeLine(natRules, append(args, "-j", string(svcXlbChain))...) + } + + // If the service has no endpoints then reject packets. The filter + // table doesn't currently have the same per-service structure that + // the nat table does, so we just stick this into the kube-services + // chain. + if len(proxier.endpointsMap[svcName]) == 0 { + writeLine(filterRules, + "-A", string(kubeServicesChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcName.String()), + "-m", "addrtype", "--dst-type", "LOCAL", + "-m", protocol, "-p", protocol, + "--dport", fmt.Sprintf("%d", svcInfo.nodePort), + "-j", "REJECT", + ) + } + } + + // If the service has no endpoints then reject packets. + if len(proxier.endpointsMap[svcName]) == 0 { + writeLine(filterRules, + "-A", string(kubeServicesChain), + "-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcName.String()), + "-m", protocol, "-p", protocol, + "-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()), + "--dport", fmt.Sprintf("%d", svcInfo.port), + "-j", "REJECT", + ) + continue + } + + // From here on, we assume there are active endpoints. + + // Generate the per-endpoint chains. We do this in multiple passes so we + // can group rules together. + // These two slices parallel each other - keep in sync + endpoints := make([]*endpointsInfo, 0) + endpointChains := make([]utiliptables.Chain, 0) + for _, ep := range proxier.endpointsMap[svcName] { + endpoints = append(endpoints, ep) + endpointChain := servicePortEndpointChainName(svcName, protocol, ep.endpoint) + endpointChains = append(endpointChains, endpointChain) + + // Create the endpoint chain, retaining counters if possible. + if chain, ok := existingNATChains[utiliptables.Chain(endpointChain)]; ok { + writeLine(natChains, chain) + } else { + writeLine(natChains, utiliptables.MakeChainLine(endpointChain)) + } + activeNATChains[endpointChain] = true + } + + // First write session affinity rules, if applicable. + if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP { + for _, endpointChain := range endpointChains { + writeLine(natRules, + "-A", string(svcChain), + "-m", "comment", "--comment", svcName.String(), + "-m", "recent", "--name", string(endpointChain), + "--rcheck", "--seconds", fmt.Sprintf("%d", svcInfo.stickyMaxAgeMinutes*60), "--reap", + "-j", string(endpointChain)) + } + } + + // Now write loadbalancing & DNAT rules. + n := len(endpointChains) + for i, endpointChain := range endpointChains { + // Balancing rules in the per-service chain. + args := []string{ + "-A", string(svcChain), + "-m", "comment", "--comment", svcName.String(), + } + if i < (n - 1) { + // Each rule is a probabilistic match. + args = append(args, + "-m", "statistic", + "--mode", "random", + "--probability", fmt.Sprintf("%0.5f", 1.0/float64(n-i))) + } + // The final (or only if n == 1) rule is a guaranteed match. + args = append(args, "-j", string(endpointChain)) + writeLine(natRules, args...) + + // Rules in the per-endpoint chain. + args = []string{ + "-A", string(endpointChain), + "-m", "comment", "--comment", svcName.String(), + } + // Handle traffic that loops back to the originator with SNAT. + writeLine(natRules, append(args, + "-s", fmt.Sprintf("%s/32", strings.Split(endpoints[i].endpoint, ":")[0]), + "-j", string(KubeMarkMasqChain))...) + // Update client-affinity lists. + if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP { + args = append(args, "-m", "recent", "--name", string(endpointChain), "--set") + } + // DNAT to final destination. + args = append(args, "-m", protocol, "-p", protocol, "-j", "DNAT", "--to-destination", endpoints[i].endpoint) + writeLine(natRules, args...) + } + + // The logic below this applies only if this service is marked as OnlyLocal + if !svcInfo.onlyNodeLocalEndpoints { + continue + } + + // Now write ingress loadbalancing & DNAT rules only for services that have a localOnly annotation + // TODO - This logic may be combinable with the block above that creates the svc balancer chain + localEndpoints := make([]*endpointsInfo, 0) + localEndpointChains := make([]utiliptables.Chain, 0) + for i := range endpointChains { + if endpoints[i].isLocal { + // These slices parallel each other; must be kept in sync + localEndpoints = append(localEndpoints, endpoints[i]) + localEndpointChains = append(localEndpointChains, endpointChains[i]) + } + } + // First rule in the chain redirects all pod -> external vip traffic to the + // Service's ClusterIP instead. This happens whether or not we have local + // endpoints; only if clusterCIDR is specified + if len(proxier.clusterCIDR) > 0 { + args = []string{ + "-A", string(svcXlbChain), + "-m", "comment", "--comment", + fmt.Sprintf(`"Redirect pods trying to reach external loadbalancer VIP to clusterIP"`), + "-s", proxier.clusterCIDR, + "-j", string(svcChain), + } + writeLine(natRules, args...) + } + + numLocalEndpoints := len(localEndpointChains) + if numLocalEndpoints == 0 { + // Blackhole all traffic since there are no local endpoints + args := []string{ + "-A", string(svcXlbChain), + "-m", "comment", "--comment", + fmt.Sprintf(`"%s has no local endpoints"`, svcName.String()), + "-j", + string(KubeMarkDropChain), + } + writeLine(natRules, args...) + } else { + // Setup probability filter rules only over local endpoints + for i, endpointChain := range localEndpointChains { + // Balancing rules in the per-service chain. + args := []string{ + "-A", string(svcXlbChain), + "-m", "comment", "--comment", + fmt.Sprintf(`"Balancing rule %d for %s"`, i, svcName.String()), + } + if i < (numLocalEndpoints - 1) { + // Each rule is a probabilistic match. + args = append(args, + "-m", "statistic", + "--mode", "random", + "--probability", fmt.Sprintf("%0.5f", 1.0/float64(numLocalEndpoints-i))) + } + // The final (or only if n == 1) rule is a guaranteed match. + args = append(args, "-j", string(endpointChain)) + writeLine(natRules, args...) + } + } + } + + // Delete chains no longer in use. + for chain := range existingNATChains { + if !activeNATChains[chain] { + chainString := string(chain) + if !strings.HasPrefix(chainString, "KUBE-SVC-") && !strings.HasPrefix(chainString, "KUBE-SEP-") && !strings.HasPrefix(chainString, "KUBE-FW-") && !strings.HasPrefix(chainString, "KUBE-XLB-") { + // Ignore chains that aren't ours. + continue + } + // We must (as per iptables) write a chain-line for it, which has + // the nice effect of flushing the chain. Then we can remove the + // chain. + writeLine(natChains, existingNATChains[chain]) + writeLine(natRules, "-X", chainString) + } + } + + // Finally, tail-call to the nodeports chain. This needs to be after all + // other service portal rules. + writeLine(natRules, + "-A", string(kubeServicesChain), + "-m", "comment", "--comment", `"kubernetes service nodeports; NOTE: this must be the last rule in this chain"`, + "-m", "addrtype", "--dst-type", "LOCAL", + "-j", string(kubeNodePortsChain)) + + // Write the end-of-table markers. + writeLine(filterRules, "COMMIT") + writeLine(natRules, "COMMIT") + + // Sync rules. + // NOTE: NoFlushTables is used so we don't flush non-kubernetes chains in the table. + filterLines := append(filterChains.Bytes(), filterRules.Bytes()...) + natLines := append(natChains.Bytes(), natRules.Bytes()...) + lines := append(filterLines, natLines...) + + glog.V(3).Infof("Restoring iptables rules: %s", lines) + err = proxier.iptables.RestoreAll(lines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) + if err != nil { + glog.Errorf("Failed to execute iptables-restore: %v\nRules:\n%s", err, lines) + // Revert new local ports. + revertPorts(replacementPortsMap, proxier.portsMap) + return + } + + // Close old local ports and save new ones. + for k, v := range proxier.portsMap { + if replacementPortsMap[k] == nil { + v.Close() + } + } + proxier.portsMap = replacementPortsMap +} + +// Clear UDP conntrack for port or all conntrack entries when port equal zero. +// When a packet arrives, it will not go through NAT table again, because it is not "the first" packet. +// The solution is clearing the conntrack. Known issus: +// https://github.com/docker/docker/issues/8795 +// https://github.com/kubernetes/kubernetes/issues/31983 +func (proxier *Proxier) clearUdpConntrackForPort(port int) { + glog.V(2).Infof("Deleting conntrack entries for udp connections") + if port > 0 { + err := utilproxy.ExecConntrackTool(proxier.exec, "-D", "-p", "udp", "--dport", strconv.Itoa(port)) + if err != nil && !strings.Contains(err.Error(), noConnectionToDelete) { + glog.Errorf("conntrack return with error: %v", err) + } + } else { + glog.Errorf("Wrong port number. The port number must be greater than zero") + } +} + +// Join all words with spaces, terminate with newline and write to buf. +func writeLine(buf *bytes.Buffer, words ...string) { + buf.WriteString(strings.Join(words, " ") + "\n") +} + +func isLocalIP(ip string) (bool, error) { + addrs, err := net.InterfaceAddrs() + if err != nil { + return false, err + } + for i := range addrs { + intf, _, err := net.ParseCIDR(addrs[i].String()) + if err != nil { + return false, err + } + if net.ParseIP(ip).Equal(intf) { + return true, nil + } + } + return false, nil +} + +func openLocalPort(lp *localPort) (closeable, error) { + // For ports on node IPs, open the actual port and hold it, even though we + // use iptables to redirect traffic. + // This ensures a) that it's safe to use that port and b) that (a) stays + // true. The risk is that some process on the node (e.g. sshd or kubelet) + // is using a port and we give that same port out to a Service. That would + // be bad because iptables would silently claim the traffic but the process + // would never know. + // NOTE: We should not need to have a real listen()ing socket - bind() + // should be enough, but I can't figure out a way to e2e test without + // it. Tools like 'ss' and 'netstat' do not show sockets that are + // bind()ed but not listen()ed, and at least the default debian netcat + // has no way to avoid about 10 seconds of retries. + var socket closeable + switch lp.protocol { + case "tcp": + listener, err := net.Listen("tcp", net.JoinHostPort(lp.ip, strconv.Itoa(lp.port))) + if err != nil { + return nil, err + } + socket = listener + case "udp": + addr, err := net.ResolveUDPAddr("udp", net.JoinHostPort(lp.ip, strconv.Itoa(lp.port))) + if err != nil { + return nil, err + } + conn, err := net.ListenUDP("udp", addr) + if err != nil { + return nil, err + } + socket = conn + default: + return nil, fmt.Errorf("unknown protocol %q", lp.protocol) + } + glog.V(2).Infof("Opened local port %s", lp.String()) + return socket, nil +} + +// revertPorts is closing ports in replacementPortsMap but not in originalPortsMap. In other words, it only +// closes the ports opened in this sync. +func revertPorts(replacementPortsMap, originalPortsMap map[localPort]closeable) { + for k, v := range replacementPortsMap { + // Only close newly opened local ports - leave ones that were open before this update + if originalPortsMap[k] == nil { + glog.V(2).Infof("Closing local port %s after iptables-restore failure", k.String()) + v.Close() + } + } +} diff --git a/vendor/k8s.io/kubernetes/pkg/proxy/types.go b/vendor/k8s.io/kubernetes/pkg/proxy/types.go new file mode 100644 index 00000000..d9ff569c --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/proxy/types.go @@ -0,0 +1,49 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package proxy + +import ( + "fmt" + + "k8s.io/apimachinery/pkg/types" + "k8s.io/kubernetes/pkg/api" +) + +// ProxyProvider is the interface provided by proxier implementations. +type ProxyProvider interface { + // OnServiceUpdate manages the active set of service proxies. + // Active service proxies are reinitialized if found in the update set or + // removed if missing from the update set. + OnServiceUpdate(services []api.Service) + // Sync immediately synchronizes the ProxyProvider's current state to iptables. + Sync() + // SyncLoop runs periodic work. + // This is expected to run as a goroutine or as the main loop of the app. + // It does not return. + SyncLoop() +} + +// ServicePortName carries a namespace + name + portname. This is the unique +// identfier for a load-balanced service. +type ServicePortName struct { + types.NamespacedName + Port string +} + +func (spn ServicePortName) String() string { + return fmt.Sprintf("%s:%s", spn.NamespacedName.String(), spn.Port) +} diff --git a/vendor/k8s.io/kubernetes/pkg/proxy/util/conntrack.go b/vendor/k8s.io/kubernetes/pkg/proxy/util/conntrack.go new file mode 100644 index 00000000..436045ec --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/proxy/util/conntrack.go @@ -0,0 +1,58 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "fmt" + "strings" + + "k8s.io/kubernetes/pkg/util/exec" + + "github.com/golang/glog" +) + +// Utilities for dealing with conntrack + +const noConnectionToDelete = "0 flow entries have been deleted" + +// DeleteServiceConnection uses the conntrack tool to delete the conntrack entries +// for the UDP connections specified by the given service IPs +func DeleteServiceConnections(execer exec.Interface, svcIPs []string) { + for _, ip := range svcIPs { + glog.V(2).Infof("Deleting connection tracking state for service IP %s", ip) + err := ExecConntrackTool(execer, "-D", "--orig-dst", ip, "-p", "udp") + if err != nil && !strings.Contains(err.Error(), noConnectionToDelete) { + // TODO: Better handling for deletion failure. When failure occur, stale udp connection may not get flushed. + // These stale udp connection will keep black hole traffic. Making this a best effort operation for now, since it + // is expensive to baby-sit all udp connections to kubernetes services. + glog.Errorf("conntrack returned error: %v", err) + } + } +} + +// ExecConntrackTool executes the conntrack tool using the given parameters +func ExecConntrackTool(execer exec.Interface, parameters ...string) error { + conntrackPath, err := execer.LookPath("conntrack") + if err != nil { + return fmt.Errorf("error looking for path of conntrack: %v", err) + } + output, err := execer.Command(conntrackPath, parameters...).CombinedOutput() + if err != nil { + return fmt.Errorf("conntrack command returned: %q, error message: %s", string(output), err) + } + return nil +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/dbus/dbus.go b/vendor/k8s.io/kubernetes/pkg/util/dbus/dbus.go new file mode 100644 index 00000000..702d16e5 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/dbus/dbus.go @@ -0,0 +1,133 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dbus + +import ( + godbus "github.com/godbus/dbus" +) + +// Interface is an interface that presents a subset of the godbus/dbus API. Use this +// when you want to inject fakeable/mockable D-Bus behavior. +type Interface interface { + // SystemBus returns a connection to the system bus, connecting to it + // first if necessary + SystemBus() (Connection, error) + // SessionBus returns a connection to the session bus, connecting to it + // first if necessary + SessionBus() (Connection, error) +} + +// Connection represents a D-Bus connection +type Connection interface { + // Returns an Object representing the bus itself + BusObject() Object + + // Object creates a representation of a remote D-Bus object + Object(name, path string) Object + + // Signal registers or unregisters a channel to receive D-Bus signals + Signal(ch chan<- *godbus.Signal) +} + +// Object represents a remote D-Bus object +type Object interface { + // Call synchronously calls a D-Bus method + Call(method string, flags godbus.Flags, args ...interface{}) Call +} + +// Call represents a pending or completed D-Bus method call +type Call interface { + // Store returns a completed call's return values, or an error + Store(retvalues ...interface{}) error +} + +// Implements Interface in terms of actually talking to D-Bus +type dbusImpl struct { + systemBus *connImpl + sessionBus *connImpl +} + +// Implements Connection as a godbus.Conn +type connImpl struct { + conn *godbus.Conn +} + +// Implements Object as a godbus.Object +type objectImpl struct { + object godbus.BusObject +} + +// Implements Call as a godbus.Call +type callImpl struct { + call *godbus.Call +} + +// New returns a new Interface which will use godbus to talk to D-Bus +func New() Interface { + return &dbusImpl{} +} + +// SystemBus is part of Interface +func (db *dbusImpl) SystemBus() (Connection, error) { + if db.systemBus == nil { + bus, err := godbus.SystemBus() + if err != nil { + return nil, err + } + db.systemBus = &connImpl{bus} + } + + return db.systemBus, nil +} + +// SessionBus is part of Interface +func (db *dbusImpl) SessionBus() (Connection, error) { + if db.sessionBus == nil { + bus, err := godbus.SessionBus() + if err != nil { + return nil, err + } + db.sessionBus = &connImpl{bus} + } + + return db.sessionBus, nil +} + +// BusObject is part of the Connection interface +func (conn *connImpl) BusObject() Object { + return &objectImpl{conn.conn.BusObject()} +} + +// Object is part of the Connection interface +func (conn *connImpl) Object(name, path string) Object { + return &objectImpl{conn.conn.Object(name, godbus.ObjectPath(path))} +} + +// Signal is part of the Connection interface +func (conn *connImpl) Signal(ch chan<- *godbus.Signal) { + conn.conn.Signal(ch) +} + +// Call is part of the Object interface +func (obj *objectImpl) Call(method string, flags godbus.Flags, args ...interface{}) Call { + return &callImpl{obj.object.Call(method, flags, args...)} +} + +// Store is part of the Call interface +func (call *callImpl) Store(retvalues ...interface{}) error { + return call.call.Store(retvalues...) +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/dbus/doc.go b/vendor/k8s.io/kubernetes/pkg/util/dbus/doc.go new file mode 100644 index 00000000..b07da628 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/dbus/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package dbus provides an injectable interface and implementations for D-Bus communication +package dbus // import "k8s.io/kubernetes/pkg/util/dbus" diff --git a/vendor/k8s.io/kubernetes/pkg/util/dbus/fake_dbus.go b/vendor/k8s.io/kubernetes/pkg/util/dbus/fake_dbus.go new file mode 100644 index 00000000..44131272 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/dbus/fake_dbus.go @@ -0,0 +1,135 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dbus + +import ( + "fmt" + + godbus "github.com/godbus/dbus" +) + +// DBusFake is a simple fake Interface type. +type DBusFake struct { + systemBus *DBusFakeConnection + sessionBus *DBusFakeConnection +} + +// DBusFakeConnection represents a fake D-Bus connection +type DBusFakeConnection struct { + busObject *fakeObject + objects map[string]*fakeObject + signalHandlers []chan<- *godbus.Signal +} + +// DBusFakeHandler is used to handle fake D-Bus method calls +type DBusFakeHandler func(method string, args ...interface{}) ([]interface{}, error) + +type fakeObject struct { + handler DBusFakeHandler +} + +type fakeCall struct { + ret []interface{} + err error +} + +// NewFake returns a new Interface which will fake talking to D-Bus +func NewFake(systemBus *DBusFakeConnection, sessionBus *DBusFakeConnection) *DBusFake { + return &DBusFake{systemBus, sessionBus} +} + +func NewFakeConnection() *DBusFakeConnection { + return &DBusFakeConnection{ + objects: make(map[string]*fakeObject), + } +} + +// SystemBus is part of Interface +func (db *DBusFake) SystemBus() (Connection, error) { + if db.systemBus != nil { + return db.systemBus, nil + } else { + return nil, fmt.Errorf("DBus is not running") + } +} + +// SessionBus is part of Interface +func (db *DBusFake) SessionBus() (Connection, error) { + if db.sessionBus != nil { + return db.sessionBus, nil + } else { + return nil, fmt.Errorf("DBus is not running") + } +} + +// BusObject is part of the Connection interface +func (conn *DBusFakeConnection) BusObject() Object { + return conn.busObject +} + +// Object is part of the Connection interface +func (conn *DBusFakeConnection) Object(name, path string) Object { + return conn.objects[name+path] +} + +// Signal is part of the Connection interface +func (conn *DBusFakeConnection) Signal(ch chan<- *godbus.Signal) { + for i := range conn.signalHandlers { + if conn.signalHandlers[i] == ch { + conn.signalHandlers = append(conn.signalHandlers[:i], conn.signalHandlers[i+1:]...) + return + } + } + conn.signalHandlers = append(conn.signalHandlers, ch) +} + +// SetBusObject sets the handler for the BusObject of conn +func (conn *DBusFakeConnection) SetBusObject(handler DBusFakeHandler) { + conn.busObject = &fakeObject{handler} +} + +// AddObject adds a handler for the Object at name and path +func (conn *DBusFakeConnection) AddObject(name, path string, handler DBusFakeHandler) { + conn.objects[name+path] = &fakeObject{handler} +} + +// EmitSignal emits a signal on conn +func (conn *DBusFakeConnection) EmitSignal(name, path, iface, signal string, args ...interface{}) { + sig := &godbus.Signal{ + Sender: name, + Path: godbus.ObjectPath(path), + Name: iface + "." + signal, + Body: args, + } + for _, ch := range conn.signalHandlers { + ch <- sig + } +} + +// Call is part of the Object interface +func (obj *fakeObject) Call(method string, flags godbus.Flags, args ...interface{}) Call { + ret, err := obj.handler(method, args...) + return &fakeCall{ret, err} +} + +// Store is part of the Call interface +func (call *fakeCall) Store(retvalues ...interface{}) error { + if call.err != nil { + return call.err + } + return godbus.Store(call.ret, retvalues...) +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/iptables/doc.go b/vendor/k8s.io/kubernetes/pkg/util/iptables/doc.go new file mode 100644 index 00000000..f2649829 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/iptables/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package iptables provides an interface and implementations for running iptables commands. +package iptables // import "k8s.io/kubernetes/pkg/util/iptables" diff --git a/vendor/k8s.io/kubernetes/pkg/util/iptables/iptables.go b/vendor/k8s.io/kubernetes/pkg/util/iptables/iptables.go new file mode 100644 index 00000000..1b09ff77 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/iptables/iptables.go @@ -0,0 +1,581 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package iptables + +import ( + "bytes" + "fmt" + "regexp" + "strings" + "sync" + + godbus "github.com/godbus/dbus" + "github.com/golang/glog" + "k8s.io/apimachinery/pkg/util/sets" + utildbus "k8s.io/kubernetes/pkg/util/dbus" + utilexec "k8s.io/kubernetes/pkg/util/exec" + utilversion "k8s.io/kubernetes/pkg/util/version" +) + +type RulePosition string + +const ( + Prepend RulePosition = "-I" + Append RulePosition = "-A" +) + +// An injectable interface for running iptables commands. Implementations must be goroutine-safe. +type Interface interface { + // GetVersion returns the "X.Y.Z" version string for iptables. + GetVersion() (string, error) + // EnsureChain checks if the specified chain exists and, if not, creates it. If the chain existed, return true. + EnsureChain(table Table, chain Chain) (bool, error) + // FlushChain clears the specified chain. If the chain did not exist, return error. + FlushChain(table Table, chain Chain) error + // DeleteChain deletes the specified chain. If the chain did not exist, return error. + DeleteChain(table Table, chain Chain) error + // EnsureRule checks if the specified rule is present and, if not, creates it. If the rule existed, return true. + EnsureRule(position RulePosition, table Table, chain Chain, args ...string) (bool, error) + // DeleteRule checks if the specified rule is present and, if so, deletes it. + DeleteRule(table Table, chain Chain, args ...string) error + // IsIpv6 returns true if this is managing ipv6 tables + IsIpv6() bool + // Save calls `iptables-save` for table. + Save(table Table) ([]byte, error) + // SaveAll calls `iptables-save`. + SaveAll() ([]byte, error) + // Restore runs `iptables-restore` passing data through []byte. + // table is the Table to restore + // data should be formatted like the output of Save() + // flush sets the presence of the "--noflush" flag. see: FlushFlag + // counters sets the "--counters" flag. see: RestoreCountersFlag + Restore(table Table, data []byte, flush FlushFlag, counters RestoreCountersFlag) error + // RestoreAll is the same as Restore except that no table is specified. + RestoreAll(data []byte, flush FlushFlag, counters RestoreCountersFlag) error + // AddReloadFunc adds a function to call on iptables reload + AddReloadFunc(reloadFunc func()) + // Destroy cleans up resources used by the Interface + Destroy() +} + +type Protocol byte + +const ( + ProtocolIpv4 Protocol = iota + 1 + ProtocolIpv6 +) + +type Table string + +const ( + TableNAT Table = "nat" + TableFilter Table = "filter" +) + +type Chain string + +const ( + ChainPostrouting Chain = "POSTROUTING" + ChainPrerouting Chain = "PREROUTING" + ChainOutput Chain = "OUTPUT" + ChainInput Chain = "INPUT" +) + +const ( + cmdIPTablesSave string = "iptables-save" + cmdIPTablesRestore string = "iptables-restore" + cmdIPTables string = "iptables" + cmdIp6tables string = "ip6tables" +) + +// Option flag for Restore +type RestoreCountersFlag bool + +const RestoreCounters RestoreCountersFlag = true +const NoRestoreCounters RestoreCountersFlag = false + +// Option flag for Flush +type FlushFlag bool + +const FlushTables FlushFlag = true +const NoFlushTables FlushFlag = false + +// Versions of iptables less than this do not support the -C / --check flag +// (test whether a rule exists). +const MinCheckVersion = "1.4.11" + +// Minimum iptables versions supporting the -w and -w2 flags +const MinWaitVersion = "1.4.20" +const MinWait2Version = "1.4.22" + +// runner implements Interface in terms of exec("iptables"). +type runner struct { + mu sync.Mutex + exec utilexec.Interface + dbus utildbus.Interface + protocol Protocol + hasCheck bool + waitFlag []string + + reloadFuncs []func() + signal chan *godbus.Signal +} + +// New returns a new Interface which will exec iptables. +func New(exec utilexec.Interface, dbus utildbus.Interface, protocol Protocol) Interface { + vstring, err := getIPTablesVersionString(exec) + if err != nil { + glog.Warningf("Error checking iptables version, assuming version at least %s: %v", MinCheckVersion, err) + vstring = MinCheckVersion + } + runner := &runner{ + exec: exec, + dbus: dbus, + protocol: protocol, + hasCheck: getIPTablesHasCheckCommand(vstring), + waitFlag: getIPTablesWaitFlag(vstring), + } + runner.connectToFirewallD() + return runner +} + +// Destroy is part of Interface. +func (runner *runner) Destroy() { + if runner.signal != nil { + runner.signal <- nil + } +} + +const ( + firewalldName = "org.fedoraproject.FirewallD1" + firewalldPath = "/org/fedoraproject/FirewallD1" + firewalldInterface = "org.fedoraproject.FirewallD1" +) + +// Connects to D-Bus and listens for FirewallD start/restart. (On non-FirewallD-using +// systems, this is effectively a no-op; we listen for the signals, but they will never be +// emitted, so reload() will never be called.) +func (runner *runner) connectToFirewallD() { + bus, err := runner.dbus.SystemBus() + if err != nil { + glog.V(1).Infof("Could not connect to D-Bus system bus: %s", err) + return + } + + rule := fmt.Sprintf("type='signal',sender='%s',path='%s',interface='%s',member='Reloaded'", firewalldName, firewalldPath, firewalldInterface) + bus.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, rule) + + rule = fmt.Sprintf("type='signal',interface='org.freedesktop.DBus',member='NameOwnerChanged',path='/org/freedesktop/DBus',sender='org.freedesktop.DBus',arg0='%s'", firewalldName) + bus.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, rule) + + runner.signal = make(chan *godbus.Signal, 10) + bus.Signal(runner.signal) + + go runner.dbusSignalHandler(bus) +} + +// GetVersion returns the version string. +func (runner *runner) GetVersion() (string, error) { + return getIPTablesVersionString(runner.exec) +} + +// EnsureChain is part of Interface. +func (runner *runner) EnsureChain(table Table, chain Chain) (bool, error) { + fullArgs := makeFullArgs(table, chain) + + runner.mu.Lock() + defer runner.mu.Unlock() + + out, err := runner.run(opCreateChain, fullArgs) + if err != nil { + if ee, ok := err.(utilexec.ExitError); ok { + if ee.Exited() && ee.ExitStatus() == 1 { + return true, nil + } + } + return false, fmt.Errorf("error creating chain %q: %v: %s", chain, err, out) + } + return false, nil +} + +// FlushChain is part of Interface. +func (runner *runner) FlushChain(table Table, chain Chain) error { + fullArgs := makeFullArgs(table, chain) + + runner.mu.Lock() + defer runner.mu.Unlock() + + out, err := runner.run(opFlushChain, fullArgs) + if err != nil { + return fmt.Errorf("error flushing chain %q: %v: %s", chain, err, out) + } + return nil +} + +// DeleteChain is part of Interface. +func (runner *runner) DeleteChain(table Table, chain Chain) error { + fullArgs := makeFullArgs(table, chain) + + runner.mu.Lock() + defer runner.mu.Unlock() + + // TODO: we could call iptables -S first, ignore the output and check for non-zero return (more like DeleteRule) + out, err := runner.run(opDeleteChain, fullArgs) + if err != nil { + return fmt.Errorf("error deleting chain %q: %v: %s", chain, err, out) + } + return nil +} + +// EnsureRule is part of Interface. +func (runner *runner) EnsureRule(position RulePosition, table Table, chain Chain, args ...string) (bool, error) { + fullArgs := makeFullArgs(table, chain, args...) + + runner.mu.Lock() + defer runner.mu.Unlock() + + exists, err := runner.checkRule(table, chain, args...) + if err != nil { + return false, err + } + if exists { + return true, nil + } + out, err := runner.run(operation(position), fullArgs) + if err != nil { + return false, fmt.Errorf("error appending rule: %v: %s", err, out) + } + return false, nil +} + +// DeleteRule is part of Interface. +func (runner *runner) DeleteRule(table Table, chain Chain, args ...string) error { + fullArgs := makeFullArgs(table, chain, args...) + + runner.mu.Lock() + defer runner.mu.Unlock() + + exists, err := runner.checkRule(table, chain, args...) + if err != nil { + return err + } + if !exists { + return nil + } + out, err := runner.run(opDeleteRule, fullArgs) + if err != nil { + return fmt.Errorf("error deleting rule: %v: %s", err, out) + } + return nil +} + +func (runner *runner) IsIpv6() bool { + return runner.protocol == ProtocolIpv6 +} + +// Save is part of Interface. +func (runner *runner) Save(table Table) ([]byte, error) { + runner.mu.Lock() + defer runner.mu.Unlock() + + // run and return + args := []string{"-t", string(table)} + glog.V(4).Infof("running iptables-save %v", args) + return runner.exec.Command(cmdIPTablesSave, args...).CombinedOutput() +} + +// SaveAll is part of Interface. +func (runner *runner) SaveAll() ([]byte, error) { + runner.mu.Lock() + defer runner.mu.Unlock() + + // run and return + glog.V(4).Infof("running iptables-save") + return runner.exec.Command(cmdIPTablesSave, []string{}...).CombinedOutput() +} + +// Restore is part of Interface. +func (runner *runner) Restore(table Table, data []byte, flush FlushFlag, counters RestoreCountersFlag) error { + // setup args + args := []string{"-T", string(table)} + return runner.restoreInternal(args, data, flush, counters) +} + +// RestoreAll is part of Interface. +func (runner *runner) RestoreAll(data []byte, flush FlushFlag, counters RestoreCountersFlag) error { + // setup args + args := make([]string, 0) + return runner.restoreInternal(args, data, flush, counters) +} + +// restoreInternal is the shared part of Restore/RestoreAll +func (runner *runner) restoreInternal(args []string, data []byte, flush FlushFlag, counters RestoreCountersFlag) error { + runner.mu.Lock() + defer runner.mu.Unlock() + + if !flush { + args = append(args, "--noflush") + } + if counters { + args = append(args, "--counters") + } + + // run the command and return the output or an error including the output and error + glog.V(4).Infof("running iptables-restore %v", args) + cmd := runner.exec.Command(cmdIPTablesRestore, args...) + cmd.SetStdin(bytes.NewBuffer(data)) + b, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("%v (%s)", err, b) + } + return nil +} + +func (runner *runner) iptablesCommand() string { + if runner.IsIpv6() { + return cmdIp6tables + } else { + return cmdIPTables + } +} + +func (runner *runner) run(op operation, args []string) ([]byte, error) { + iptablesCmd := runner.iptablesCommand() + + fullArgs := append(runner.waitFlag, string(op)) + fullArgs = append(fullArgs, args...) + glog.V(4).Infof("running iptables %s %v", string(op), args) + return runner.exec.Command(iptablesCmd, fullArgs...).CombinedOutput() + // Don't log err here - callers might not think it is an error. +} + +// Returns (bool, nil) if it was able to check the existence of the rule, or +// (, error) if the process of checking failed. +func (runner *runner) checkRule(table Table, chain Chain, args ...string) (bool, error) { + if runner.hasCheck { + return runner.checkRuleUsingCheck(makeFullArgs(table, chain, args...)) + } else { + return runner.checkRuleWithoutCheck(table, chain, args...) + } +} + +var hexnumRE = regexp.MustCompile("0x0+([0-9])") + +func trimhex(s string) string { + return hexnumRE.ReplaceAllString(s, "0x$1") +} + +// Executes the rule check without using the "-C" flag, instead parsing iptables-save. +// Present for compatibility with <1.4.11 versions of iptables. This is full +// of hack and half-measures. We should nix this ASAP. +func (runner *runner) checkRuleWithoutCheck(table Table, chain Chain, args ...string) (bool, error) { + glog.V(1).Infof("running iptables-save -t %s", string(table)) + out, err := runner.exec.Command(cmdIPTablesSave, "-t", string(table)).CombinedOutput() + if err != nil { + return false, fmt.Errorf("error checking rule: %v", err) + } + + // Sadly, iptables has inconsistent quoting rules for comments. Just remove all quotes. + // Also, quoted multi-word comments (which are counted as a single arg) + // will be unpacked into multiple args, + // in order to compare against iptables-save output (which will be split at whitespace boundary) + // e.g. a single arg('"this must be before the NodePort rules"') will be unquoted and unpacked into 7 args. + var argsCopy []string + for i := range args { + tmpField := strings.Trim(args[i], "\"") + tmpField = trimhex(tmpField) + argsCopy = append(argsCopy, strings.Fields(tmpField)...) + } + argset := sets.NewString(argsCopy...) + + for _, line := range strings.Split(string(out), "\n") { + var fields = strings.Fields(line) + + // Check that this is a rule for the correct chain, and that it has + // the correct number of argument (+2 for "-A ") + if !strings.HasPrefix(line, fmt.Sprintf("-A %s", string(chain))) || len(fields) != len(argsCopy)+2 { + continue + } + + // Sadly, iptables has inconsistent quoting rules for comments. + // Just remove all quotes. + for i := range fields { + fields[i] = strings.Trim(fields[i], "\"") + fields[i] = trimhex(fields[i]) + } + + // TODO: This misses reorderings e.g. "-x foo ! -y bar" will match "! -x foo -y bar" + if sets.NewString(fields...).IsSuperset(argset) { + return true, nil + } + glog.V(5).Infof("DBG: fields is not a superset of args: fields=%v args=%v", fields, args) + } + + return false, nil +} + +// Executes the rule check using the "-C" flag +func (runner *runner) checkRuleUsingCheck(args []string) (bool, error) { + out, err := runner.run(opCheckRule, args) + if err == nil { + return true, nil + } + if ee, ok := err.(utilexec.ExitError); ok { + // iptables uses exit(1) to indicate a failure of the operation, + // as compared to a malformed commandline, for example. + if ee.Exited() && ee.ExitStatus() == 1 { + return false, nil + } + } + return false, fmt.Errorf("error checking rule: %v: %s", err, out) +} + +type operation string + +const ( + opCreateChain operation = "-N" + opFlushChain operation = "-F" + opDeleteChain operation = "-X" + opAppendRule operation = "-A" + opCheckRule operation = "-C" + opDeleteRule operation = "-D" +) + +func makeFullArgs(table Table, chain Chain, args ...string) []string { + return append([]string{string(chain), "-t", string(table)}, args...) +} + +// Checks if iptables has the "-C" flag +func getIPTablesHasCheckCommand(vstring string) bool { + minVersion, err := utilversion.ParseGeneric(MinCheckVersion) + if err != nil { + glog.Errorf("MinCheckVersion (%s) is not a valid version string: %v", MinCheckVersion, err) + return true + } + version, err := utilversion.ParseGeneric(vstring) + if err != nil { + glog.Errorf("vstring (%s) is not a valid version string: %v", vstring, err) + return true + } + return version.AtLeast(minVersion) +} + +// Checks if iptables version has a "wait" flag +func getIPTablesWaitFlag(vstring string) []string { + version, err := utilversion.ParseGeneric(vstring) + if err != nil { + glog.Errorf("vstring (%s) is not a valid version string: %v", vstring, err) + return nil + } + + minVersion, err := utilversion.ParseGeneric(MinWaitVersion) + if err != nil { + glog.Errorf("MinWaitVersion (%s) is not a valid version string: %v", MinWaitVersion, err) + return nil + } + if version.LessThan(minVersion) { + return nil + } + + minVersion, err = utilversion.ParseGeneric(MinWait2Version) + if err != nil { + glog.Errorf("MinWait2Version (%s) is not a valid version string: %v", MinWait2Version, err) + return nil + } + if version.LessThan(minVersion) { + return []string{"-w"} + } else { + return []string{"-w2"} + } +} + +// getIPTablesVersionString runs "iptables --version" to get the version string +// in the form "X.X.X" +func getIPTablesVersionString(exec utilexec.Interface) (string, error) { + // this doesn't access mutable state so we don't need to use the interface / runner + bytes, err := exec.Command(cmdIPTables, "--version").CombinedOutput() + if err != nil { + return "", err + } + versionMatcher := regexp.MustCompile("v([0-9]+(\\.[0-9]+)+)") + match := versionMatcher.FindStringSubmatch(string(bytes)) + if match == nil { + return "", fmt.Errorf("no iptables version found in string: %s", bytes) + } + return match[1], nil +} + +// goroutine to listen for D-Bus signals +func (runner *runner) dbusSignalHandler(bus utildbus.Connection) { + firewalld := bus.Object(firewalldName, firewalldPath) + + for s := range runner.signal { + if s == nil { + // Unregister + bus.Signal(runner.signal) + return + } + + switch s.Name { + case "org.freedesktop.DBus.NameOwnerChanged": + name := s.Body[0].(string) + new_owner := s.Body[2].(string) + + if name != firewalldName || len(new_owner) == 0 { + continue + } + + // FirewallD startup (specifically the part where it deletes + // all existing iptables rules) may not yet be complete when + // we get this signal, so make a dummy request to it to + // synchronize. + firewalld.Call(firewalldInterface+".getDefaultZone", 0) + + runner.reload() + case firewalldInterface + ".Reloaded": + runner.reload() + } + } +} + +// AddReloadFunc is part of Interface +func (runner *runner) AddReloadFunc(reloadFunc func()) { + runner.reloadFuncs = append(runner.reloadFuncs, reloadFunc) +} + +// runs all reload funcs to re-sync iptables rules +func (runner *runner) reload() { + glog.V(1).Infof("reloading iptables rules") + + for _, f := range runner.reloadFuncs { + f() + } +} + +// IsNotFoundError returns true if the error indicates "not found". It parses +// the error string looking for known values, which is imperfect but works in +// practice. +func IsNotFoundError(err error) bool { + es := err.Error() + if strings.Contains(es, "No such file or directory") { + return true + } + if strings.Contains(es, "No chain/target/match by that name") { + return true + } + return false +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/iptables/save_restore.go b/vendor/k8s.io/kubernetes/pkg/util/iptables/save_restore.go new file mode 100644 index 00000000..435a54be --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/iptables/save_restore.go @@ -0,0 +1,108 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package iptables + +import ( + "fmt" + "strings" +) + +// MakeChainLine return an iptables-save/restore formatted chain line given a Chain +func MakeChainLine(chain Chain) string { + return fmt.Sprintf(":%s - [0:0]", chain) +} + +// GetChainLines parses a table's iptables-save data to find chains in the table. +// It returns a map of iptables.Chain to string where the string is the chain line from the save (with counters etc). +func GetChainLines(table Table, save []byte) map[Chain]string { + chainsMap := make(map[Chain]string) + tablePrefix := "*" + string(table) + readIndex := 0 + // find beginning of table + for readIndex < len(save) { + line, n := ReadLine(readIndex, save) + readIndex = n + if strings.HasPrefix(line, tablePrefix) { + break + } + } + // parse table lines + for readIndex < len(save) { + line, n := ReadLine(readIndex, save) + readIndex = n + if len(line) == 0 { + continue + } + if strings.HasPrefix(line, "COMMIT") || strings.HasPrefix(line, "*") { + break + } else if strings.HasPrefix(line, "#") { + continue + } else if strings.HasPrefix(line, ":") && len(line) > 1 { + chain := Chain(strings.SplitN(line[1:], " ", 2)[0]) + chainsMap[chain] = line + } + } + return chainsMap +} + +func ReadLine(readIndex int, byteArray []byte) (string, int) { + currentReadIndex := readIndex + + // consume left spaces + for currentReadIndex < len(byteArray) { + if byteArray[currentReadIndex] == ' ' { + currentReadIndex++ + } else { + break + } + } + + // leftTrimIndex stores the left index of the line after the line is left-trimmed + leftTrimIndex := currentReadIndex + + // rightTrimIndex stores the right index of the line after the line is right-trimmed + // it is set to -1 since the correct value has not yet been determined. + rightTrimIndex := -1 + + for ; currentReadIndex < len(byteArray); currentReadIndex++ { + if byteArray[currentReadIndex] == ' ' { + // set rightTrimIndex + if rightTrimIndex == -1 { + rightTrimIndex = currentReadIndex + } + } else if (byteArray[currentReadIndex] == '\n') || (currentReadIndex == (len(byteArray) - 1)) { + // end of line or byte buffer is reached + if currentReadIndex <= leftTrimIndex { + return "", currentReadIndex + 1 + } + // set the rightTrimIndex + if rightTrimIndex == -1 { + rightTrimIndex = currentReadIndex + if currentReadIndex == (len(byteArray)-1) && (byteArray[currentReadIndex] != '\n') { + // ensure that the last character is part of the returned string, + // unless the last character is '\n' + rightTrimIndex = currentReadIndex + 1 + } + } + return string(byteArray[leftTrimIndex:rightTrimIndex]), currentReadIndex + 1 + } else { + // unset rightTrimIndex + rightTrimIndex = -1 + } + } + return "", currentReadIndex +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/net/sets/doc.go b/vendor/k8s.io/kubernetes/pkg/util/net/sets/doc.go new file mode 100644 index 00000000..8414f74a --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/net/sets/doc.go @@ -0,0 +1,28 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// This package contains hand-coded set implementations that should be similar +// to the autogenerated ones in pkg/util/sets. +// We can't simply use net.IPNet as a map-key in Go (because it contains a +// []byte). +// We could use the same workaround we use here (a string representation as the +// key) to autogenerate sets. If we do that, or decide on an alternate +// approach, we should replace the implementations in this package with the +// autogenerated versions. +// It is expected that callers will alias this import as "netsets" i.e. import +// netsets "k8s.io/kubernetes/pkg/util/net/sets" + +package sets diff --git a/vendor/k8s.io/kubernetes/pkg/util/net/sets/ipnet.go b/vendor/k8s.io/kubernetes/pkg/util/net/sets/ipnet.go new file mode 100644 index 00000000..5b6fe933 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/net/sets/ipnet.go @@ -0,0 +1,119 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sets + +import ( + "net" + "strings" +) + +type IPNet map[string]*net.IPNet + +func ParseIPNets(specs ...string) (IPNet, error) { + ipnetset := make(IPNet) + for _, spec := range specs { + spec = strings.TrimSpace(spec) + _, ipnet, err := net.ParseCIDR(spec) + if err != nil { + return nil, err + } + k := ipnet.String() // In case of normalization + ipnetset[k] = ipnet + } + return ipnetset, nil +} + +// Insert adds items to the set. +func (s IPNet) Insert(items ...*net.IPNet) { + for _, item := range items { + s[item.String()] = item + } +} + +// Delete removes all items from the set. +func (s IPNet) Delete(items ...*net.IPNet) { + for _, item := range items { + delete(s, item.String()) + } +} + +// Has returns true if and only if item is contained in the set. +func (s IPNet) Has(item *net.IPNet) bool { + _, contained := s[item.String()] + return contained +} + +// HasAll returns true if and only if all items are contained in the set. +func (s IPNet) HasAll(items ...*net.IPNet) bool { + for _, item := range items { + if !s.Has(item) { + return false + } + } + return true +} + +// Difference returns a set of objects that are not in s2 +// For example: +// s1 = {a1, a2, a3} +// s2 = {a1, a2, a4, a5} +// s1.Difference(s2) = {a3} +// s2.Difference(s1) = {a4, a5} +func (s IPNet) Difference(s2 IPNet) IPNet { + result := make(IPNet) + for k, i := range s { + _, found := s2[k] + if found { + continue + } + result[k] = i + } + return result +} + +// StringSlice returns a []string with the String representation of each element in the set. +// Order is undefined. +func (s IPNet) StringSlice() []string { + a := make([]string, 0, len(s)) + for k := range s { + a = append(a, k) + } + return a +} + +// IsSuperset returns true if and only if s1 is a superset of s2. +func (s1 IPNet) IsSuperset(s2 IPNet) bool { + for k := range s2 { + _, found := s1[k] + if !found { + return false + } + } + return true +} + +// Equal returns true if and only if s1 is equal (as a set) to s2. +// Two sets are equal if their membership is identical. +// (In practice, this means same elements, order doesn't matter) +func (s1 IPNet) Equal(s2 IPNet) bool { + return len(s1) == len(s2) && s1.IsSuperset(s2) +} + +// Len returns the size of the set. +func (s IPNet) Len() int { + return len(s) +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/sysctl/sysctl.go b/vendor/k8s.io/kubernetes/pkg/util/sysctl/sysctl.go new file mode 100644 index 00000000..e58b99d3 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/sysctl/sysctl.go @@ -0,0 +1,73 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sysctl + +import ( + "io/ioutil" + "path" + "strconv" + "strings" +) + +const ( + sysctlBase = "/proc/sys" + VmOvercommitMemory = "vm/overcommit_memory" + VmPanicOnOOM = "vm/panic_on_oom" + KernelPanic = "kernel/panic" + KernelPanicOnOops = "kernel/panic_on_oops" + + VmOvercommitMemoryAlways = 1 // kernel performs no memory over-commit handling + VmPanicOnOOMInvokeOOMKiller = 0 // kernel calls the oom_killer function when OOM occurs + + KernelPanicOnOopsAlways = 1 // kernel panics on kernel oops + KernelPanicRebootTimeout = 10 // seconds after a panic for the kernel to reboot +) + +// An injectable interface for running sysctl commands. +type Interface interface { + // GetSysctl returns the value for the specified sysctl setting + GetSysctl(sysctl string) (int, error) + // SetSysctl modifies the specified sysctl flag to the new value + SetSysctl(sysctl string, newVal int) error +} + +// New returns a new Interface for accessing sysctl +func New() Interface { + return &procSysctl{} +} + +// procSysctl implements Interface by reading and writing files under /proc/sys +type procSysctl struct { +} + +// GetSysctl returns the value for the specified sysctl setting +func (_ *procSysctl) GetSysctl(sysctl string) (int, error) { + data, err := ioutil.ReadFile(path.Join(sysctlBase, sysctl)) + if err != nil { + return -1, err + } + val, err := strconv.Atoi(strings.Trim(string(data), " \n")) + if err != nil { + return -1, err + } + return val, nil +} + +// SetSysctl modifies the specified sysctl flag to the new value +func (_ *procSysctl) SetSysctl(sysctl string, newVal int) error { + return ioutil.WriteFile(path.Join(sysctlBase, sysctl), []byte(strconv.Itoa(newVal)), 0640) +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/version/doc.go b/vendor/k8s.io/kubernetes/pkg/util/version/doc.go new file mode 100644 index 00000000..ebe43152 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/version/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package version provides utilities for version number comparisons +package version // import "k8s.io/kubernetes/pkg/util/version" diff --git a/vendor/k8s.io/kubernetes/pkg/util/version/version.go b/vendor/k8s.io/kubernetes/pkg/util/version/version.go new file mode 100644 index 00000000..327f2e67 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/version/version.go @@ -0,0 +1,236 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package version + +import ( + "bytes" + "fmt" + "regexp" + "strconv" + "strings" +) + +// Version is an opqaue representation of a version number +type Version struct { + components []uint + semver bool + preRelease string + buildMetadata string +} + +var ( + // versionMatchRE splits a version string into numeric and "extra" parts + versionMatchRE = regexp.MustCompile(`^\s*v?([0-9]+(?:\.[0-9]+)*)(.*)*$`) + // extraMatchRE splits the "extra" part of versionMatchRE into semver pre-release and build metadata; it does not validate the "no leading zeroes" constraint for pre-release + extraMatchRE = regexp.MustCompile(`^(?:-([0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+([0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?\s*$`) +) + +func parse(str string, semver bool) (*Version, error) { + parts := versionMatchRE.FindStringSubmatch(str) + if parts == nil { + return nil, fmt.Errorf("could not parse %q as version", str) + } + numbers, extra := parts[1], parts[2] + + components := strings.Split(numbers, ".") + if (semver && len(components) != 3) || (!semver && len(components) < 2) { + return nil, fmt.Errorf("illegal version string %q", str) + } + + v := &Version{ + components: make([]uint, len(components)), + semver: semver, + } + for i, comp := range components { + if (i == 0 || semver) && strings.HasPrefix(comp, "0") && comp != "0" { + return nil, fmt.Errorf("illegal zero-prefixed version component %q in %q", comp, str) + } + num, err := strconv.ParseUint(comp, 10, 0) + if err != nil { + return nil, fmt.Errorf("illegal non-numeric version component %q in %q: %v", comp, str, err) + } + v.components[i] = uint(num) + } + + if semver && extra != "" { + extraParts := extraMatchRE.FindStringSubmatch(extra) + if extraParts == nil { + return nil, fmt.Errorf("could not parse pre-release/metadata (%s) in version %q", extra, str) + } + v.preRelease, v.buildMetadata = extraParts[1], extraParts[2] + + for _, comp := range strings.Split(v.preRelease, ".") { + if _, err := strconv.ParseUint(comp, 10, 0); err == nil { + if strings.HasPrefix(comp, "0") && comp != "0" { + return nil, fmt.Errorf("illegal zero-prefixed version component %q in %q", comp, str) + } + } + } + } + + return v, nil +} + +// ParseGeneric parses a "generic" version string. The version string must consist of two +// or more dot-separated numeric fields (the first of which can't have leading zeroes), +// followed by arbitrary uninterpreted data (which need not be separated from the final +// numeric field by punctuation). For convenience, leading and trailing whitespace is +// ignored, and the version can be preceded by the letter "v". See also ParseSemantic. +func ParseGeneric(str string) (*Version, error) { + return parse(str, false) +} + +// MustParseGeneric is like ParseGeneric except that it panics on error +func MustParseGeneric(str string) *Version { + v, err := ParseGeneric(str) + if err != nil { + panic(err) + } + return v +} + +// ParseSemantic parses a version string that exactly obeys the syntax and semantics of +// the "Semantic Versioning" specification (http://semver.org/) (although it ignores +// leading and trailing whitespace, and allows the version to be preceded by "v"). For +// version strings that are not guaranteed to obey the Semantic Versioning syntax, use +// ParseGeneric. +func ParseSemantic(str string) (*Version, error) { + return parse(str, true) +} + +// MustParseSemantic is like ParseSemantic except that it panics on error +func MustParseSemantic(str string) *Version { + v, err := ParseSemantic(str) + if err != nil { + panic(err) + } + return v +} + +// BuildMetadata returns the build metadata, if v is a Semantic Version, or "" +func (v *Version) BuildMetadata() string { + return v.buildMetadata +} + +// String converts a Version back to a string; note that for versions parsed with +// ParseGeneric, this will not include the trailing uninterpreted portion of the version +// number. +func (v *Version) String() string { + var buffer bytes.Buffer + + for i, comp := range v.components { + if i > 0 { + buffer.WriteString(".") + } + buffer.WriteString(fmt.Sprintf("%d", comp)) + } + if v.preRelease != "" { + buffer.WriteString("-") + buffer.WriteString(v.preRelease) + } + if v.buildMetadata != "" { + buffer.WriteString("+") + buffer.WriteString(v.buildMetadata) + } + + return buffer.String() +} + +// compareInternal returns -1 if v is less than other, 1 if it is greater than other, or 0 +// if they are equal +func (v *Version) compareInternal(other *Version) int { + for i := range v.components { + switch { + case i >= len(other.components): + if v.components[i] != 0 { + return 1 + } + case other.components[i] < v.components[i]: + return 1 + case other.components[i] > v.components[i]: + return -1 + } + } + + if !v.semver || !other.semver { + return 0 + } + + switch { + case v.preRelease == "" && other.preRelease != "": + return 1 + case v.preRelease != "" && other.preRelease == "": + return -1 + case v.preRelease == other.preRelease: // includes case where both are "" + return 0 + } + + vPR := strings.Split(v.preRelease, ".") + oPR := strings.Split(other.preRelease, ".") + for i := range vPR { + if i >= len(oPR) { + return 1 + } + vNum, err := strconv.ParseUint(vPR[i], 10, 0) + if err == nil { + oNum, err := strconv.ParseUint(oPR[i], 10, 0) + if err == nil { + switch { + case oNum < vNum: + return 1 + case oNum > vNum: + return -1 + default: + continue + } + } + } + if oPR[i] < vPR[i] { + return 1 + } else if oPR[i] > vPR[i] { + return -1 + } + } + + return 0 +} + +// AtLeast tests if a version is at least equal to a given minimum version. If both +// Versions are Semantic Versions, this will use the Semantic Version comparison +// algorithm. Otherwise, it will compare only the numeric components, with non-present +// components being considered "0" (ie, "1.4" is equal to "1.4.0"). +func (v *Version) AtLeast(min *Version) bool { + return v.compareInternal(min) != -1 +} + +// LessThan tests if a version is less than a given version. (It is exactly the opposite +// of AtLeast, for situations where asking "is v too old?" makes more sense than asking +// "is v new enough?".) +func (v *Version) LessThan(other *Version) bool { + return v.compareInternal(other) == -1 +} + +// Compare compares v against a version string (which will be parsed as either Semantic +// or non-Semantic depending on v). On success it returns -1 if v is less than other, 1 if +// it is greater than other, or 0 if they are equal. +func (v *Version) Compare(other string) (int, error) { + ov, err := parse(other, v.semver) + if err != nil { + return 0, err + } + return v.compareInternal(ov), nil +}