Switch to github.com/golang/dep for vendoring

Signed-off-by: Mrunal Patel <mrunalp@gmail.com>
This commit is contained in:
Mrunal Patel 2017-01-31 16:45:59 -08:00
parent d6ab91be27
commit 8e5b17cf13
15431 changed files with 3971413 additions and 8881 deletions

101
vendor/k8s.io/kubernetes/pkg/controller/node/BUILD generated vendored Normal file
View file

@ -0,0 +1,101 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"cidr_allocator.go",
"cidr_set.go",
"controller_utils.go",
"doc.go",
"metrics.go",
"nodecontroller.go",
"rate_limited_queue.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/clientset_generated/clientset:go_default_library",
"//pkg/client/clientset_generated/clientset/typed/core/v1:go_default_library",
"//pkg/client/record:go_default_library",
"//pkg/cloudprovider:go_default_library",
"//pkg/controller/informers:go_default_library",
"//pkg/fields:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/util/flowcontrol:go_default_library",
"//pkg/util/metrics:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/system:go_default_library",
"//pkg/util/version:go_default_library",
"//vendor:github.com/golang/glog",
"//vendor:github.com/prometheus/client_golang/prometheus",
"//vendor:k8s.io/apimachinery/pkg/api/errors",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/labels",
"//vendor:k8s.io/apimachinery/pkg/types",
"//vendor:k8s.io/apimachinery/pkg/util/errors",
"//vendor:k8s.io/apimachinery/pkg/util/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/util/wait",
],
)
go_test(
name = "go_default_test",
srcs = [
"cidr_allocator_test.go",
"cidr_set_test.go",
"nodecontroller_test.go",
"rate_limited_queue_test.go",
],
library = ":go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apis/extensions/v1beta1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/clientset_generated/clientset:go_default_library",
"//pkg/client/clientset_generated/clientset/fake:go_default_library",
"//pkg/client/testing/core:go_default_library",
"//pkg/cloudprovider:go_default_library",
"//pkg/cloudprovider/providers/fake:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/controller/informers:go_default_library",
"//pkg/controller/node/testutil:go_default_library",
"//pkg/util/flowcontrol:go_default_library",
"//pkg/util/node:go_default_library",
"//vendor:github.com/golang/glog",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/types",
"//vendor:k8s.io/apimachinery/pkg/util/diff",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/util/wait",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/controller/node/testutil:all-srcs",
],
tags = ["automanaged"],
)

5
vendor/k8s.io/kubernetes/pkg/controller/node/OWNERS generated vendored Executable file
View file

@ -0,0 +1,5 @@
reviewers:
- gmarek
- smarterclayton
- ingvagabund
- aveshagarwal

View file

@ -0,0 +1,267 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"errors"
"fmt"
"net"
"sync"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
"k8s.io/kubernetes/pkg/client/record"
"github.com/golang/glog"
)
// TODO: figure out the good setting for those constants.
const (
// controls how many NodeSpec updates NC can process concurrently.
cidrUpdateWorkers = 10
cidrUpdateQueueSize = 5000
// podCIDRUpdateRetry controls the number of retries of writing Node.Spec.PodCIDR update.
podCIDRUpdateRetry = 5
)
var errCIDRRangeNoCIDRsRemaining = errors.New("CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range")
type nodeAndCIDR struct {
cidr *net.IPNet
nodeName string
}
// CIDRAllocator is an interface implemented by things that know how to allocate/occupy/recycle CIDR for nodes.
type CIDRAllocator interface {
AllocateOrOccupyCIDR(node *v1.Node) error
ReleaseCIDR(node *v1.Node) error
}
type rangeAllocator struct {
client clientset.Interface
cidrs *cidrSet
clusterCIDR *net.IPNet
maxCIDRs int
// Channel that is used to pass updating Nodes with assigned CIDRs to the background
// This increases a throughput of CIDR assignment by not blocking on long operations.
nodeCIDRUpdateChannel chan nodeAndCIDR
recorder record.EventRecorder
// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation
sync.Mutex
nodesInProcessing sets.String
}
// NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node
// Caller must ensure subNetMaskSize is not less than cluster CIDR mask size.
// Caller must always pass in a list of existing nodes so the new allocator
// can initialize its CIDR map. NodeList is only nil in testing.
func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(v1.EventSource{Component: "cidrAllocator"})
eventBroadcaster.StartLogging(glog.Infof)
ra := &rangeAllocator{
client: client,
cidrs: newCIDRSet(clusterCIDR, subNetMaskSize),
clusterCIDR: clusterCIDR,
nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize),
recorder: recorder,
nodesInProcessing: sets.NewString(),
}
if serviceCIDR != nil {
ra.filterOutServiceRange(serviceCIDR)
} else {
glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.")
}
if nodeList != nil {
for _, node := range nodeList.Items {
if node.Spec.PodCIDR == "" {
glog.Infof("Node %v has no CIDR, ignoring", node.Name)
continue
} else {
glog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR)
}
if err := ra.occupyCIDR(&node); err != nil {
// This will happen if:
// 1. We find garbage in the podCIDR field. Retrying is useless.
// 2. CIDR out of range: This means a node CIDR has changed.
// This error will keep crashing controller-manager.
return nil, err
}
}
}
for i := 0; i < cidrUpdateWorkers; i++ {
go func(stopChan <-chan struct{}) {
for {
select {
case workItem, ok := <-ra.nodeCIDRUpdateChannel:
if !ok {
glog.Warning("NodeCIDRUpdateChannel read returned false.")
return
}
ra.updateCIDRAllocation(workItem)
case <-stopChan:
return
}
}
}(wait.NeverStop)
}
return ra, nil
}
func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool {
r.Lock()
defer r.Unlock()
if r.nodesInProcessing.Has(nodeName) {
return false
}
r.nodesInProcessing.Insert(nodeName)
return true
}
func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) {
r.Lock()
defer r.Unlock()
r.nodesInProcessing.Delete(nodeName)
}
func (r *rangeAllocator) occupyCIDR(node *v1.Node) error {
defer r.removeNodeFromProcessing(node.Name)
if node.Spec.PodCIDR == "" {
return nil
}
_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
if err != nil {
return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR)
}
if err := r.cidrs.occupy(podCIDR); err != nil {
return fmt.Errorf("failed to mark cidr as occupied: %v", err)
}
return nil
}
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid CIDR
// if it doesn't currently have one or mark the CIDR as used if the node already have one.
// WARNING: If you're adding any return calls or defer any more work from this function
// you have to handle correctly nodesInProcessing.
func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
if node == nil {
return nil
}
if !r.insertNodeToProcessing(node.Name) {
glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
return nil
}
if node.Spec.PodCIDR != "" {
return r.occupyCIDR(node)
}
podCIDR, err := r.cidrs.allocateNext()
if err != nil {
r.removeNodeFromProcessing(node.Name)
recordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to allocate cidr: %v", err)
}
glog.V(10).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR)
r.nodeCIDRUpdateChannel <- nodeAndCIDR{
nodeName: node.Name,
cidr: podCIDR,
}
return nil
}
// ReleaseCIDR releases the CIDR of the removed node
func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
if node == nil || node.Spec.PodCIDR == "" {
return nil
}
_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
if err != nil {
return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err)
}
glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR)
if err = r.cidrs.release(podCIDR); err != nil {
return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err)
}
return err
}
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used,
// so that they won't be assignable.
func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
// Checks if service CIDR has a nonempty intersection with cluster CIDR. It is the case if either
// clusterCIDR contains serviceCIDR with clusterCIDR's Mask applied (this means that clusterCIDR contains serviceCIDR)
// or vice versa (which means that serviceCIDR contains clusterCIDR).
if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) {
return
}
if err := r.cidrs.occupy(serviceCIDR); err != nil {
glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err)
}
}
// Assigns CIDR to Node and sends an update to the API server.
func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
var err error
var node *v1.Node
defer r.removeNodeFromProcessing(data.nodeName)
for rep := 0; rep < podCIDRUpdateRetry; rep++ {
// TODO: change it to using PATCH instead of full Node updates.
node, err = r.client.Core().Nodes().Get(data.nodeName, metav1.GetOptions{})
if err != nil {
glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
continue
}
if node.Spec.PodCIDR != "" {
glog.Errorf("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR)
if node.Spec.PodCIDR != data.cidr.String() {
if err := r.cidrs.release(data.cidr); err != nil {
glog.Errorf("Error when releasing CIDR %v", data.cidr.String())
}
}
return nil
}
node.Spec.PodCIDR = data.cidr.String()
if _, err := r.client.Core().Nodes().Update(node); err != nil {
glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err)
} else {
break
}
}
if err != nil {
recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
// We accept the fact that we may leek CIDRs here. This is safer than releasing
// them in case when we don't know if request went through.
// NodeController restart will return all falsely allocated CIDRs to the pool.
if !apierrors.IsServerTimeout(err) {
glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err)
if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil {
glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr)
}
}
}
return err
}

View file

@ -0,0 +1,397 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"net"
"testing"
"time"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake"
"k8s.io/kubernetes/pkg/controller/node/testutil"
)
const (
nodePollInterval = 100 * time.Millisecond
)
func waitForUpdatedNodeWithTimeout(nodeHandler *testutil.FakeNodeHandler, number int, timeout time.Duration) error {
return wait.Poll(nodePollInterval, timeout, func() (bool, error) {
if len(nodeHandler.GetUpdatedNodesCopy()) >= number {
return true, nil
}
return false, nil
})
}
func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
testCases := []struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDR string
allocatedCIDRs []string
}{
{
description: "When there's no ServiceCIDR return first CIDR in range",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
expectedAllocatedCIDR: "127.123.234.0/30",
},
{
description: "Correctly filter out ServiceCIDR",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24")
return clusterCIDR
}(),
serviceCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/26")
return clusterCIDR
}(),
subNetMaskSize: 30,
// it should return first /30 CIDR after service range
expectedAllocatedCIDR: "127.123.234.64/30",
},
{
description: "Correctly ignore already allocated CIDRs",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24")
return clusterCIDR
}(),
serviceCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/26")
return clusterCIDR
}(),
subNetMaskSize: 30,
allocatedCIDRs: []string{"127.123.234.64/30", "127.123.234.68/30", "127.123.234.72/30", "127.123.234.80/30"},
expectedAllocatedCIDR: "127.123.234.76/30",
},
}
testFunc := func(tc struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDR string
allocatedCIDRs []string
}) {
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, tc.clusterCIDR, tc.serviceCIDR, tc.subNetMaskSize, nil)
// this is a bit of white box testing
for _, allocated := range tc.allocatedCIDRs {
_, cidr, err := net.ParseCIDR(allocated)
if err != nil {
t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err)
}
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
return
}
if err = rangeAllocator.cidrs.occupy(cidr); err != nil {
t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err)
}
}
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err)
}
found := false
seenCIDRs := []string{}
for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() {
seenCIDRs = append(seenCIDRs, updatedNode.Spec.PodCIDR)
if updatedNode.Spec.PodCIDR == tc.expectedAllocatedCIDR {
found = true
break
}
}
if !found {
t.Errorf("%v: Unable to find allocated CIDR %v, found updated Nodes with CIDRs: %v",
tc.description, tc.expectedAllocatedCIDR, seenCIDRs)
}
}
for _, tc := range testCases {
testFunc(tc)
}
}
func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
testCases := []struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
allocatedCIDRs []string
}{
{
description: "When there's no ServiceCIDR return first CIDR in range",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
allocatedCIDRs: []string{"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"},
},
}
testFunc := func(tc struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
allocatedCIDRs []string
}) {
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, tc.clusterCIDR, tc.serviceCIDR, tc.subNetMaskSize, nil)
// this is a bit of white box testing
for _, allocated := range tc.allocatedCIDRs {
_, cidr, err := net.ParseCIDR(allocated)
if err != nil {
t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err)
}
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
return
}
err = rangeAllocator.cidrs.occupy(cidr)
if err != nil {
t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err)
}
}
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
}
// We don't expect any updates, so just sleep for some time
time.Sleep(time.Second)
if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 {
t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy())
}
seenCIDRs := []string{}
for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() {
if updatedNode.Spec.PodCIDR != "" {
seenCIDRs = append(seenCIDRs, updatedNode.Spec.PodCIDR)
}
}
if len(seenCIDRs) != 0 {
t.Errorf("%v: Seen assigned CIDRs when not expected: %v",
tc.description, seenCIDRs)
}
}
for _, tc := range testCases {
testFunc(tc)
}
}
func TestReleaseCIDRSuccess(t *testing.T) {
testCases := []struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDRFirstRound string
expectedAllocatedCIDRSecondRound string
allocatedCIDRs []string
cidrsToRelease []string
}{
{
description: "Correctly release preallocated CIDR",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
allocatedCIDRs: []string{"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"},
expectedAllocatedCIDRFirstRound: "",
cidrsToRelease: []string{"127.123.234.4/30"},
expectedAllocatedCIDRSecondRound: "127.123.234.4/30",
},
{
description: "Correctly recycle CIDR",
fakeNodeHandler: &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{
Name: "node0",
},
},
},
Clientset: fake.NewSimpleClientset(),
},
clusterCIDR: func() *net.IPNet {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28")
return clusterCIDR
}(),
serviceCIDR: nil,
subNetMaskSize: 30,
expectedAllocatedCIDRFirstRound: "127.123.234.0/30",
cidrsToRelease: []string{"127.123.234.0/30"},
expectedAllocatedCIDRSecondRound: "127.123.234.0/30",
},
}
testFunc := func(tc struct {
description string
fakeNodeHandler *testutil.FakeNodeHandler
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
subNetMaskSize int
expectedAllocatedCIDRFirstRound string
expectedAllocatedCIDRSecondRound string
allocatedCIDRs []string
cidrsToRelease []string
}) {
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, tc.clusterCIDR, tc.serviceCIDR, tc.subNetMaskSize, nil)
// this is a bit of white box testing
for _, allocated := range tc.allocatedCIDRs {
_, cidr, err := net.ParseCIDR(allocated)
if err != nil {
t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err)
}
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
return
}
err = rangeAllocator.cidrs.occupy(cidr)
if err != nil {
t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err)
}
}
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
if tc.expectedAllocatedCIDRFirstRound != "" {
if err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err)
}
} else {
if err == nil {
t.Fatalf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
}
// We don't expect any updates here
time.Sleep(time.Second)
if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 {
t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy())
}
}
for _, cidrToRelease := range tc.cidrsToRelease {
nodeToRelease := v1.Node{
ObjectMeta: v1.ObjectMeta{
Name: "node0",
},
}
nodeToRelease.Spec.PodCIDR = cidrToRelease
err = allocator.ReleaseCIDR(&nodeToRelease)
if err != nil {
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
}
}
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err)
}
found := false
seenCIDRs := []string{}
for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() {
seenCIDRs = append(seenCIDRs, updatedNode.Spec.PodCIDR)
if updatedNode.Spec.PodCIDR == tc.expectedAllocatedCIDRSecondRound {
found = true
break
}
}
if !found {
t.Errorf("%v: Unable to find allocated CIDR %v, found updated Nodes with CIDRs: %v",
tc.description, tc.expectedAllocatedCIDRSecondRound, seenCIDRs)
}
}
for _, tc := range testCases {
testFunc(tc)
}
}

View file

@ -0,0 +1,150 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"encoding/binary"
"fmt"
"math/big"
"net"
"sync"
)
type cidrSet struct {
sync.Mutex
clusterCIDR *net.IPNet
clusterIP net.IP
clusterMaskSize int
maxCIDRs int
nextCandidate int
used big.Int
subNetMaskSize int
}
func newCIDRSet(clusterCIDR *net.IPNet, subNetMaskSize int) *cidrSet {
clusterMask := clusterCIDR.Mask
clusterMaskSize, _ := clusterMask.Size()
maxCIDRs := 1 << uint32(subNetMaskSize-clusterMaskSize)
return &cidrSet{
clusterCIDR: clusterCIDR,
clusterIP: clusterCIDR.IP.To4(),
clusterMaskSize: clusterMaskSize,
maxCIDRs: maxCIDRs,
subNetMaskSize: subNetMaskSize,
}
}
func (s *cidrSet) allocateNext() (*net.IPNet, error) {
s.Lock()
defer s.Unlock()
nextUnused := -1
for i := 0; i < s.maxCIDRs; i++ {
candidate := (i + s.nextCandidate) % s.maxCIDRs
if s.used.Bit(candidate) == 0 {
nextUnused = candidate
break
}
}
if nextUnused == -1 {
return nil, errCIDRRangeNoCIDRsRemaining
}
s.nextCandidate = (nextUnused + 1) % s.maxCIDRs
s.used.SetBit(&s.used, nextUnused, 1)
j := uint32(nextUnused) << uint32(32-s.subNetMaskSize)
ipInt := (binary.BigEndian.Uint32(s.clusterIP)) | j
ip := make([]byte, 4)
binary.BigEndian.PutUint32(ip, ipInt)
return &net.IPNet{
IP: ip,
Mask: net.CIDRMask(s.subNetMaskSize, 32),
}, nil
}
func (s *cidrSet) getBeginingAndEndIndices(cidr *net.IPNet) (begin, end int, err error) {
begin, end = 0, s.maxCIDRs
cidrMask := cidr.Mask
maskSize, _ := cidrMask.Size()
if !s.clusterCIDR.Contains(cidr.IP.Mask(s.clusterCIDR.Mask)) && !cidr.Contains(s.clusterCIDR.IP.Mask(cidr.Mask)) {
return -1, -1, fmt.Errorf("cidr %v is out the range of cluster cidr %v", cidr, s.clusterCIDR)
}
if s.clusterMaskSize < maskSize {
subNetMask := net.CIDRMask(s.subNetMaskSize, 32)
begin, err = s.getIndexForCIDR(&net.IPNet{
IP: cidr.IP.To4().Mask(subNetMask),
Mask: subNetMask,
})
if err != nil {
return -1, -1, err
}
ip := make([]byte, 4)
ipInt := binary.BigEndian.Uint32(cidr.IP) | (^binary.BigEndian.Uint32(cidr.Mask))
binary.BigEndian.PutUint32(ip, ipInt)
end, err = s.getIndexForCIDR(&net.IPNet{
IP: net.IP(ip).To4().Mask(subNetMask),
Mask: subNetMask,
})
if err != nil {
return -1, -1, err
}
}
return begin, end, nil
}
func (s *cidrSet) release(cidr *net.IPNet) error {
begin, end, err := s.getBeginingAndEndIndices(cidr)
if err != nil {
return err
}
s.Lock()
defer s.Unlock()
for i := begin; i <= end; i++ {
s.used.SetBit(&s.used, i, 0)
}
return nil
}
func (s *cidrSet) occupy(cidr *net.IPNet) (err error) {
begin, end, err := s.getBeginingAndEndIndices(cidr)
if err != nil {
return err
}
s.Lock()
defer s.Unlock()
for i := begin; i <= end; i++ {
s.used.SetBit(&s.used, i, 1)
}
return nil
}
func (s *cidrSet) getIndexForCIDR(cidr *net.IPNet) (int, error) {
cidrIndex := (binary.BigEndian.Uint32(s.clusterIP) ^ binary.BigEndian.Uint32(cidr.IP.To4())) >> uint32(32-s.subNetMaskSize)
if cidrIndex >= uint32(s.maxCIDRs) {
return 0, fmt.Errorf("CIDR: %v is out of the range of CIDR allocator", cidr)
}
return int(cidrIndex), nil
}

View file

@ -0,0 +1,336 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"math/big"
"net"
"reflect"
"testing"
"github.com/golang/glog"
)
func TestCIDRSetFullyAllocated(t *testing.T) {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/30")
a := newCIDRSet(clusterCIDR, 30)
p, err := a.allocateNext()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if p.String() != "127.123.234.0/30" {
t.Fatalf("unexpected allocated cidr: %s", p.String())
}
_, err = a.allocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range")
}
a.release(p)
p, err = a.allocateNext()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if p.String() != "127.123.234.0/30" {
t.Fatalf("unexpected allocated cidr: %s", p.String())
}
_, err = a.allocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range")
}
}
func TestCIDRSet_RandomishAllocation(t *testing.T) {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/16")
a := newCIDRSet(clusterCIDR, 24)
// allocate all the CIDRs
var err error
cidrs := make([]*net.IPNet, 256)
for i := 0; i < 256; i++ {
cidrs[i], err = a.allocateNext()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
_, err = a.allocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range")
}
// release them all
for i := 0; i < 256; i++ {
a.release(cidrs[i])
}
// allocate the CIDRs again
rcidrs := make([]*net.IPNet, 256)
for i := 0; i < 256; i++ {
rcidrs[i], err = a.allocateNext()
if err != nil {
t.Fatalf("unexpected error: %d, %v", i, err)
}
}
_, err = a.allocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range")
}
if !reflect.DeepEqual(cidrs, rcidrs) {
t.Fatalf("expected re-allocated cidrs are the same collection")
}
}
func TestCIDRSet_AllocationOccupied(t *testing.T) {
_, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/16")
a := newCIDRSet(clusterCIDR, 24)
// allocate all the CIDRs
var err error
cidrs := make([]*net.IPNet, 256)
for i := 0; i < 256; i++ {
cidrs[i], err = a.allocateNext()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
_, err = a.allocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range")
}
// release them all
for i := 0; i < 256; i++ {
a.release(cidrs[i])
}
// occupy the last 128 CIDRs
for i := 128; i < 256; i++ {
a.occupy(cidrs[i])
}
// allocate the first 128 CIDRs again
rcidrs := make([]*net.IPNet, 128)
for i := 0; i < 128; i++ {
rcidrs[i], err = a.allocateNext()
if err != nil {
t.Fatalf("unexpected error: %d, %v", i, err)
}
}
_, err = a.allocateNext()
if err == nil {
t.Fatalf("expected error because of fully-allocated range")
}
// check Occupy() work properly
for i := 128; i < 256; i++ {
rcidrs = append(rcidrs, cidrs[i])
}
if !reflect.DeepEqual(cidrs, rcidrs) {
t.Fatalf("expected re-allocated cidrs are the same collection")
}
}
func TestGetBitforCIDR(t *testing.T) {
cases := []struct {
clusterCIDRStr string
subNetMaskSize int
subNetCIDRStr string
expectedBit int
expectErr bool
}{
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/16",
expectedBit: 0,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.123.0.0/16",
expectedBit: 123,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.168.0.0/16",
expectedBit: 168,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.224.0.0/16",
expectedBit: 224,
expectErr: false,
},
{
clusterCIDRStr: "192.168.0.0/16",
subNetMaskSize: 24,
subNetCIDRStr: "192.168.12.0/24",
expectedBit: 12,
expectErr: false,
},
{
clusterCIDRStr: "192.168.0.0/16",
subNetMaskSize: 24,
subNetCIDRStr: "192.168.151.0/24",
expectedBit: 151,
expectErr: false,
},
{
clusterCIDRStr: "192.168.0.0/16",
subNetMaskSize: 24,
subNetCIDRStr: "127.168.224.0/24",
expectErr: true,
},
}
for _, tc := range cases {
_, clusterCIDR, err := net.ParseCIDR(tc.clusterCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
cs := newCIDRSet(clusterCIDR, tc.subNetMaskSize)
_, subnetCIDR, err := net.ParseCIDR(tc.subNetCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
got, err := cs.getIndexForCIDR(subnetCIDR)
if err == nil && tc.expectErr {
glog.Errorf("expected error but got null")
continue
}
if err != nil && !tc.expectErr {
glog.Errorf("unexpected error: %v", err)
continue
}
if got != tc.expectedBit {
glog.Errorf("expected %v, but got %v", tc.expectedBit, got)
}
}
}
func TestOccupy(t *testing.T) {
cases := []struct {
clusterCIDRStr string
subNetMaskSize int
subNetCIDRStr string
expectedUsedBegin int
expectedUsedEnd int
expectErr bool
}{
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/8",
expectedUsedBegin: 0,
expectedUsedEnd: 256,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/2",
expectedUsedBegin: 0,
expectedUsedEnd: 256,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/16",
expectedUsedBegin: 0,
expectedUsedEnd: 0,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/8",
subNetMaskSize: 32,
subNetCIDRStr: "127.0.0.0/16",
expectedUsedBegin: 0,
expectedUsedEnd: 65535,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/7",
subNetMaskSize: 16,
subNetCIDRStr: "127.0.0.0/15",
expectedUsedBegin: 256,
expectedUsedEnd: 257,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/7",
subNetMaskSize: 15,
subNetCIDRStr: "127.0.0.0/15",
expectedUsedBegin: 128,
expectedUsedEnd: 128,
expectErr: false,
},
{
clusterCIDRStr: "127.0.0.0/7",
subNetMaskSize: 18,
subNetCIDRStr: "127.0.0.0/15",
expectedUsedBegin: 1024,
expectedUsedEnd: 1031,
expectErr: false,
},
}
for _, tc := range cases {
_, clusterCIDR, err := net.ParseCIDR(tc.clusterCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
cs := newCIDRSet(clusterCIDR, tc.subNetMaskSize)
_, subnetCIDR, err := net.ParseCIDR(tc.subNetCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
err = cs.occupy(subnetCIDR)
if err == nil && tc.expectErr {
t.Errorf("expected error but got none")
continue
}
if err != nil && !tc.expectErr {
t.Errorf("unexpected error: %v", err)
continue
}
expectedUsed := big.Int{}
for i := tc.expectedUsedBegin; i <= tc.expectedUsedEnd; i++ {
expectedUsed.SetBit(&expectedUsed, i, 1)
}
if expectedUsed.Cmp(&cs.used) != 0 {
t.Errorf("error")
}
}
}

View file

@ -0,0 +1,290 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"fmt"
"strings"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/util/node"
utilversion "k8s.io/kubernetes/pkg/util/version"
"github.com/golang/glog"
)
const (
// Number of Nodes that needs to be in the cluster for it to be treated as "large"
LargeClusterThreshold = 20
)
// deletePods will delete all pods from master running on given node, and return true
// if any pods were deleted, or were found pending deletion.
func deletePods(kubeClient clientset.Interface, recorder record.EventRecorder, nodeName, nodeUID string, daemonStore cache.StoreToDaemonSetLister) (bool, error) {
remaining := false
selector := fields.OneTermEqualSelector(api.PodHostField, nodeName).String()
options := v1.ListOptions{FieldSelector: selector}
pods, err := kubeClient.Core().Pods(v1.NamespaceAll).List(options)
var updateErrList []error
if err != nil {
return remaining, err
}
if len(pods.Items) > 0 {
recordNodeEvent(recorder, nodeName, nodeUID, v1.EventTypeNormal, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName))
}
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeName {
continue
}
// Set reason and message in the pod object.
if _, err = setPodTerminationReason(kubeClient, &pod, nodeName); err != nil {
if errors.IsConflict(err) {
updateErrList = append(updateErrList,
fmt.Errorf("update status failed for pod %q: %v", format.Pod(&pod), err))
continue
}
}
// if the pod has already been marked for deletion, we still return true that there are remaining pods.
if pod.DeletionGracePeriodSeconds != nil {
remaining = true
continue
}
// if the pod is managed by a daemonset, ignore it
_, err := daemonStore.GetPodDaemonSets(&pod)
if err == nil { // No error means at least one daemonset was found
continue
}
glog.V(2).Infof("Starting deletion of pod %v", pod.Name)
recorder.Eventf(&pod, v1.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName)
if err := kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
return false, err
}
remaining = true
}
if len(updateErrList) > 0 {
return false, utilerrors.NewAggregate(updateErrList)
}
return remaining, nil
}
// setPodTerminationReason attempts to set a reason and message in the pod status, updates it in the apiserver,
// and returns an error if it encounters one.
func setPodTerminationReason(kubeClient clientset.Interface, pod *v1.Pod, nodeName string) (*v1.Pod, error) {
if pod.Status.Reason == node.NodeUnreachablePodReason {
return pod, nil
}
pod.Status.Reason = node.NodeUnreachablePodReason
pod.Status.Message = fmt.Sprintf(node.NodeUnreachablePodMessage, nodeName, pod.Name)
var updatedPod *v1.Pod
var err error
if updatedPod, err = kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod); err != nil {
return nil, err
}
return updatedPod, nil
}
func forcefullyDeletePod(c clientset.Interface, pod *v1.Pod) error {
var zero int64
glog.Infof("NodeController is force deleting Pod: %v:%v", pod.Namespace, pod.Name)
err := c.Core().Pods(pod.Namespace).Delete(pod.Name, &v1.DeleteOptions{GracePeriodSeconds: &zero})
if err == nil {
glog.V(4).Infof("forceful deletion of %s succeeded", pod.Name)
}
return err
}
// forcefullyDeleteNode immediately the node. The pods on the node are cleaned
// up by the podGC.
func forcefullyDeleteNode(kubeClient clientset.Interface, nodeName string) error {
if err := kubeClient.Core().Nodes().Delete(nodeName, nil); err != nil {
return fmt.Errorf("unable to delete node %q: %v", nodeName, err)
}
return nil
}
// maybeDeleteTerminatingPod non-gracefully deletes pods that are terminating
// that should not be gracefully terminated.
func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) {
pod, ok := obj.(*v1.Pod)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
glog.Errorf("Couldn't get object from tombstone %#v", obj)
return
}
pod, ok = tombstone.Obj.(*v1.Pod)
if !ok {
glog.Errorf("Tombstone contained object that is not a Pod %#v", obj)
return
}
}
// consider only terminating pods
if pod.DeletionTimestamp == nil {
return
}
nodeObj, found, err := nc.nodeStore.Store.GetByKey(pod.Spec.NodeName)
if err != nil {
// this can only happen if the Store.KeyFunc has a problem creating
// a key for the pod. If it happens once, it will happen again so
// don't bother requeuing the pod.
utilruntime.HandleError(err)
return
}
// if there is no such node, do nothing and let the podGC clean it up.
if !found {
return
}
// delete terminating pods that have been scheduled on
// nodes that do not support graceful termination
// TODO(mikedanese): this can be removed when we no longer
// guarantee backwards compatibility of master API to kubelets with
// versions less than 1.1.0
node := nodeObj.(*v1.Node)
v, err := utilversion.ParseSemantic(node.Status.NodeInfo.KubeletVersion)
if err != nil {
glog.V(0).Infof("Couldn't parse version %q of node: %v", node.Status.NodeInfo.KubeletVersion, err)
utilruntime.HandleError(nc.forcefullyDeletePod(pod))
return
}
if v.LessThan(gracefulDeletionVersion) {
utilruntime.HandleError(nc.forcefullyDeletePod(pod))
return
}
}
// update ready status of all pods running on given node from master
// return true if success
func markAllPodsNotReady(kubeClient clientset.Interface, node *v1.Node) error {
// Don't set pods to NotReady if the kubelet is running a version that
// doesn't understand how to correct readiness.
// TODO: Remove this check when we no longer guarantee backward compatibility
// with node versions < 1.2.0.
if nodeRunningOutdatedKubelet(node) {
return nil
}
nodeName := node.Name
glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName)
opts := v1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName).String()}
pods, err := kubeClient.Core().Pods(v1.NamespaceAll).List(opts)
if err != nil {
return err
}
errMsg := []string{}
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeName {
continue
}
for i, cond := range pod.Status.Conditions {
if cond.Type == v1.PodReady {
pod.Status.Conditions[i].Status = v1.ConditionFalse
glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name)
_, err := kubeClient.Core().Pods(pod.Namespace).UpdateStatus(&pod)
if err != nil {
glog.Warningf("Failed to update status for pod %q: %v", format.Pod(&pod), err)
errMsg = append(errMsg, fmt.Sprintf("%v", err))
}
break
}
}
}
if len(errMsg) == 0 {
return nil
}
return fmt.Errorf("%v", strings.Join(errMsg, "; "))
}
// nodeRunningOutdatedKubelet returns true if the kubeletVersion reported
// in the nodeInfo of the given node is "outdated", meaning < 1.2.0.
// Older versions were inflexible and modifying pod.Status directly through
// the apiserver would result in unexpected outcomes.
func nodeRunningOutdatedKubelet(node *v1.Node) bool {
v, err := utilversion.ParseSemantic(node.Status.NodeInfo.KubeletVersion)
if err != nil {
glog.Errorf("couldn't parse version %q of node %v", node.Status.NodeInfo.KubeletVersion, err)
return true
}
if v.LessThan(podStatusReconciliationVersion) {
glog.Infof("Node %v running kubelet at (%v) which is less than the minimum version that allows nodecontroller to mark pods NotReady (%v).", node.Name, v, podStatusReconciliationVersion)
return true
}
return false
}
func nodeExistsInCloudProvider(cloud cloudprovider.Interface, nodeName types.NodeName) (bool, error) {
instances, ok := cloud.Instances()
if !ok {
return false, fmt.Errorf("%v", ErrCloudInstance)
}
if _, err := instances.ExternalID(nodeName); err != nil {
if err == cloudprovider.InstanceNotFound {
return false, nil
}
return false, err
}
return true, nil
}
func recordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype, reason, event string) {
ref := &v1.ObjectReference{
Kind: "Node",
Name: nodeName,
UID: types.UID(nodeUID),
Namespace: "",
}
glog.V(2).Infof("Recording %s event message for node %s", event, nodeName)
recorder.Eventf(ref, eventtype, reason, "Node %s event: %s", nodeName, event)
}
func recordNodeStatusChange(recorder record.EventRecorder, node *v1.Node, new_status string) {
ref := &v1.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: node.UID,
Namespace: "",
}
glog.V(2).Infof("Recording status change %s event message for node %s", new_status, node.Name)
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
recorder.Eventf(ref, v1.EventTypeNormal, new_status, "Node %s status is now: %s", node.Name, new_status)
}

19
vendor/k8s.io/kubernetes/pkg/controller/node/doc.go generated vendored Normal file
View file

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package node contains code for syncing cloud instances with
// node registry
package node // import "k8s.io/kubernetes/pkg/controller/node"

View file

@ -0,0 +1,77 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"sync"
"github.com/prometheus/client_golang/prometheus"
)
const (
NodeControllerSubsystem = "node_collector"
ZoneHealthStatisticKey = "zone_health"
ZoneSizeKey = "zone_size"
ZoneNoUnhealthyNodesKey = "unhealty_nodes_in_zone"
EvictionsNumberKey = "evictions_number"
)
var (
ZoneHealth = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: NodeControllerSubsystem,
Name: ZoneHealthStatisticKey,
Help: "Gauge measuring percentage of healty nodes per zone.",
},
[]string{"zone"},
)
ZoneSize = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: NodeControllerSubsystem,
Name: ZoneSizeKey,
Help: "Gauge measuring number of registered Nodes per zones.",
},
[]string{"zone"},
)
UnhealthyNodes = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: NodeControllerSubsystem,
Name: ZoneNoUnhealthyNodesKey,
Help: "Gauge measuring number of not Ready Nodes per zones.",
},
[]string{"zone"},
)
EvictionsNumber = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: NodeControllerSubsystem,
Name: EvictionsNumberKey,
Help: "Number of Node evictions that happened since current instance of NodeController started.",
},
[]string{"zone"},
)
)
var registerMetrics sync.Once
func Register() {
registerMetrics.Do(func() {
prometheus.MustRegister(ZoneHealth)
prometheus.MustRegister(ZoneSize)
prometheus.MustRegister(UnhealthyNodes)
prometheus.MustRegister(EvictionsNumber)
})
}

View file

@ -0,0 +1,878 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"errors"
"fmt"
"net"
"sync"
"time"
"github.com/golang/glog"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
v1core "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/core/v1"
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller/informers"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/util/flowcontrol"
"k8s.io/kubernetes/pkg/util/metrics"
utilnode "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/system"
utilversion "k8s.io/kubernetes/pkg/util/version"
)
func init() {
// Register prometheus metrics
Register()
}
var (
ErrCloudInstance = errors.New("cloud provider doesn't support instances.")
gracefulDeletionVersion = utilversion.MustParseSemantic("v1.1.0")
// The minimum kubelet version for which the nodecontroller
// can safely flip pod.Status to NotReady.
podStatusReconciliationVersion = utilversion.MustParseSemantic("v1.2.0")
)
const (
// nodeStatusUpdateRetry controls the number of retries of writing NodeStatus update.
nodeStatusUpdateRetry = 5
// controls how often NodeController will try to evict Pods from non-responsive Nodes.
nodeEvictionPeriod = 100 * time.Millisecond
// Burst value for all eviction rate limiters
evictionRateLimiterBurst = 1
// The amount of time the nodecontroller polls on the list nodes endpoint.
apiserverStartupGracePeriod = 10 * time.Minute
// The amount of time the nodecontroller should sleep between retrying NodeStatus updates
retrySleepTime = 20 * time.Millisecond
)
type zoneState string
const (
stateInitial = zoneState("Initial")
stateNormal = zoneState("Normal")
stateFullDisruption = zoneState("FullDisruption")
statePartialDisruption = zoneState("PartialDisruption")
)
type nodeStatusData struct {
probeTimestamp metav1.Time
readyTransitionTimestamp metav1.Time
status v1.NodeStatus
}
type NodeController struct {
allocateNodeCIDRs bool
cloud cloudprovider.Interface
clusterCIDR *net.IPNet
serviceCIDR *net.IPNet
knownNodeSet map[string]*v1.Node
kubeClient clientset.Interface
// Method for easy mocking in unittest.
lookupIP func(host string) ([]net.IP, error)
// Value used if sync_nodes_status=False. NodeController will not proactively
// sync node status in this case, but will monitor node status updated from kubelet. If
// it doesn't receive update for this amount of time, it will start posting "NodeReady==
// ConditionUnknown". The amount of time before which NodeController start evicting pods
// is controlled via flag 'pod-eviction-timeout'.
// Note: be cautious when changing the constant, it must work with nodeStatusUpdateFrequency
// in kubelet. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger value takes
// longer for user to see up-to-date node status.
nodeMonitorGracePeriod time.Duration
// Value controlling NodeController monitoring period, i.e. how often does NodeController
// check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod.
// TODO: Change node status monitor to watch based.
nodeMonitorPeriod time.Duration
// Value used if sync_nodes_status=False, only for node startup. When node
// is just created, e.g. cluster bootstrap or node creation, we give a longer grace period.
nodeStartupGracePeriod time.Duration
// per Node map storing last observed Status together with a local time when it was observed.
// This timestamp is to be used instead of LastProbeTime stored in Condition. We do this
// to aviod the problem with time skew across the cluster.
nodeStatusMap map[string]nodeStatusData
now func() metav1.Time
// Lock to access evictor workers
evictorLock sync.Mutex
// workers that evicts pods from unresponsive nodes.
zonePodEvictor map[string]*RateLimitedTimedQueue
podEvictionTimeout time.Duration
// The maximum duration before a pod evicted from a node can be forcefully terminated.
maximumGracePeriod time.Duration
recorder record.EventRecorder
podInformer informers.PodInformer
nodeInformer informers.NodeInformer
daemonSetInformer informers.DaemonSetInformer
podStore cache.StoreToPodLister
nodeStore cache.StoreToNodeLister
daemonSetStore cache.StoreToDaemonSetLister
// allocate/recycle CIDRs for node if allocateNodeCIDRs == true
cidrAllocator CIDRAllocator
forcefullyDeletePod func(*v1.Pod) error
nodeExistsInCloudProvider func(types.NodeName) (bool, error)
computeZoneStateFunc func(nodeConditions []*v1.NodeCondition) (int, zoneState)
enterPartialDisruptionFunc func(nodeNum int) float32
enterFullDisruptionFunc func(nodeNum int) float32
zoneStates map[string]zoneState
evictionLimiterQPS float32
secondaryEvictionLimiterQPS float32
largeClusterThreshold int32
unhealthyZoneThreshold float32
// internalPodInformer is used to hold a personal informer. If we're using
// a normal shared informer, then the informer will be started for us. If
// we have a personal informer, we must start it ourselves. If you start
// the controller using NewDaemonSetsController(passing SharedInformer), this
// will be null
internalPodInformer cache.SharedIndexInformer
}
// NewNodeController returns a new node controller to sync instances from cloudprovider.
// This method returns an error if it is unable to initialize the CIDR bitmap with
// podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes
// currently, this should be handled as a fatal error.
func NewNodeController(
podInformer informers.PodInformer,
nodeInformer informers.NodeInformer,
daemonSetInformer informers.DaemonSetInformer,
cloud cloudprovider.Interface,
kubeClient clientset.Interface,
podEvictionTimeout time.Duration,
evictionLimiterQPS float32,
secondaryEvictionLimiterQPS float32,
largeClusterThreshold int32,
unhealthyZoneThreshold float32,
nodeMonitorGracePeriod time.Duration,
nodeStartupGracePeriod time.Duration,
nodeMonitorPeriod time.Duration,
clusterCIDR *net.IPNet,
serviceCIDR *net.IPNet,
nodeCIDRMaskSize int,
allocateNodeCIDRs bool) (*NodeController, error) {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(v1.EventSource{Component: "controllermanager"})
eventBroadcaster.StartLogging(glog.Infof)
if kubeClient != nil {
glog.V(0).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.Core().Events("")})
} else {
glog.V(0).Infof("No api server defined - no events will be sent to API server.")
}
if kubeClient != nil && kubeClient.Core().RESTClient().GetRateLimiter() != nil {
metrics.RegisterMetricAndTrackRateLimiterUsage("node_controller", kubeClient.Core().RESTClient().GetRateLimiter())
}
if allocateNodeCIDRs {
if clusterCIDR == nil {
glog.Fatal("NodeController: Must specify clusterCIDR if allocateNodeCIDRs == true.")
}
mask := clusterCIDR.Mask
if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSize {
glog.Fatal("NodeController: Invalid clusterCIDR, mask size of clusterCIDR must be less than nodeCIDRMaskSize.")
}
}
nc := &NodeController{
cloud: cloud,
knownNodeSet: make(map[string]*v1.Node),
kubeClient: kubeClient,
recorder: recorder,
podEvictionTimeout: podEvictionTimeout,
maximumGracePeriod: 5 * time.Minute,
zonePodEvictor: make(map[string]*RateLimitedTimedQueue),
nodeStatusMap: make(map[string]nodeStatusData),
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod,
lookupIP: net.LookupIP,
now: metav1.Now,
clusterCIDR: clusterCIDR,
serviceCIDR: serviceCIDR,
allocateNodeCIDRs: allocateNodeCIDRs,
forcefullyDeletePod: func(p *v1.Pod) error { return forcefullyDeletePod(kubeClient, p) },
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
evictionLimiterQPS: evictionLimiterQPS,
secondaryEvictionLimiterQPS: secondaryEvictionLimiterQPS,
largeClusterThreshold: largeClusterThreshold,
unhealthyZoneThreshold: unhealthyZoneThreshold,
zoneStates: make(map[string]zoneState),
podInformer: podInformer,
nodeInformer: nodeInformer,
daemonSetInformer: daemonSetInformer,
}
nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc
nc.enterFullDisruptionFunc = nc.HealthyQPSFunc
nc.computeZoneStateFunc = nc.ComputeZoneState
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: nc.maybeDeleteTerminatingPod,
UpdateFunc: func(_, obj interface{}) { nc.maybeDeleteTerminatingPod(obj) },
})
nc.podStore = *podInformer.Lister()
nodeEventHandlerFuncs := cache.ResourceEventHandlerFuncs{}
if nc.allocateNodeCIDRs {
var nodeList *v1.NodeList
var err error
// We must poll because apiserver might not be up. This error causes
// controller manager to restart.
if pollErr := wait.Poll(10*time.Second, apiserverStartupGracePeriod, func() (bool, error) {
nodeList, err = kubeClient.Core().Nodes().List(v1.ListOptions{
FieldSelector: fields.Everything().String(),
LabelSelector: labels.Everything().String(),
})
if err != nil {
glog.Errorf("Failed to list all nodes: %v", err)
return false, nil
}
return true, nil
}); pollErr != nil {
return nil, fmt.Errorf("Failed to list all nodes in %v, cannot proceed without updating CIDR map", apiserverStartupGracePeriod)
}
nc.cidrAllocator, err = NewCIDRRangeAllocator(kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList)
if err != nil {
return nil, err
}
nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{
AddFunc: func(originalObj interface{}) {
obj, err := api.Scheme.DeepCopy(originalObj)
if err != nil {
utilruntime.HandleError(err)
return
}
node := obj.(*v1.Node)
if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil {
utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err))
}
},
UpdateFunc: func(_, obj interface{}) {
node := obj.(*v1.Node)
// If the PodCIDR is not empty we either:
// - already processed a Node that already had a CIDR after NC restarted
// (cidr is marked as used),
// - already processed a Node successfully and allocated a CIDR for it
// (cidr is marked as used),
// - already processed a Node but we did saw a "timeout" response and
// request eventually got through in this case we haven't released
// the allocated CIDR (cidr is still marked as used).
// There's a possible error here:
// - NC sees a new Node and assigns a CIDR X to it,
// - Update Node call fails with a timeout,
// - Node is updated by some other component, NC sees an update and
// assigns CIDR Y to the Node,
// - Both CIDR X and CIDR Y are marked as used in the local cache,
// even though Node sees only CIDR Y
// The problem here is that in in-memory cache we see CIDR X as marked,
// which prevents it from being assigned to any new node. The cluster
// state is correct.
// Restart of NC fixes the issue.
if node.Spec.PodCIDR == "" {
nodeCopy, err := api.Scheme.Copy(node)
if err != nil {
utilruntime.HandleError(err)
return
}
if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil {
utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err))
}
}
},
DeleteFunc: func(originalObj interface{}) {
obj, err := api.Scheme.DeepCopy(originalObj)
if err != nil {
utilruntime.HandleError(err)
return
}
node, isNode := obj.(*v1.Node)
// We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692
if !isNode {
deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
glog.Errorf("Received unexpected object: %v", obj)
return
}
node, ok = deletedState.Obj.(*v1.Node)
if !ok {
glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj)
return
}
}
if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil {
glog.Errorf("Error releasing CIDR: %v", err)
}
},
}
}
nodeInformer.Informer().AddEventHandler(nodeEventHandlerFuncs)
nc.nodeStore = *nodeInformer.Lister()
nc.daemonSetStore = *daemonSetInformer.Lister()
return nc, nil
}
// Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *NodeController) Run() {
go func() {
defer utilruntime.HandleCrash()
if !cache.WaitForCacheSync(wait.NeverStop, nc.nodeInformer.Informer().HasSynced, nc.podInformer.Informer().HasSynced, nc.daemonSetInformer.Informer().HasSynced) {
utilruntime.HandleError(errors.New("NodeController timed out while waiting for informers to sync..."))
return
}
// Incorporate the results of node status pushed from kubelet to master.
go wait.Until(func() {
if err := nc.monitorNodeStatus(); err != nil {
glog.Errorf("Error monitoring node status: %v", err)
}
}, nc.nodeMonitorPeriod, wait.NeverStop)
// Managing eviction of nodes:
// When we delete pods off a node, if the node was not empty at the time we then
// queue an eviction watcher. If we hit an error, retry deletion.
go wait.Until(func() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zonePodEvictor {
nc.zonePodEvictor[k].Try(func(value TimedValue) (bool, time.Duration) {
obj, exists, err := nc.nodeStore.GetByKey(value.Value)
if err != nil {
glog.Warningf("Failed to get Node %v from the nodeStore: %v", value.Value, err)
} else if !exists {
glog.Warningf("Node %v no longer present in nodeStore!", value.Value)
} else {
node, _ := obj.(*v1.Node)
zone := utilnode.GetZoneKey(node)
EvictionsNumber.WithLabelValues(zone).Inc()
}
nodeUid, _ := value.UID.(string)
remaining, err := deletePods(nc.kubeClient, nc.recorder, value.Value, nodeUid, nc.daemonSetStore)
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err))
return false, 0
}
if remaining {
glog.Infof("Pods awaiting deletion due to NodeController eviction")
}
return true, 0
})
}
}, nodeEvictionPeriod, wait.NeverStop)
}()
}
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not,
// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or
// not reachable for a long period of time.
func (nc *NodeController) monitorNodeStatus() error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
nodes, err := nc.nodeStore.List()
if err != nil {
return err
}
added, deleted := nc.checkForNodeAddedDeleted(&nodes)
for i := range added {
glog.V(1).Infof("NodeController observed a new Node: %#v", added[i].Name)
recordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in NodeController", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i]
// When adding new Nodes we need to check if new zone appeared, and if so add new evictor.
zone := utilnode.GetZoneKey(added[i])
if _, found := nc.zonePodEvictor[zone]; !found {
nc.zonePodEvictor[zone] =
NewRateLimitedTimedQueue(
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst))
// Init the metric for the new zone.
glog.Infof("Initializing eviction metric for zone: %v", zone)
EvictionsNumber.WithLabelValues(zone).Add(0)
}
nc.cancelPodEviction(added[i])
}
for i := range deleted {
glog.V(1).Infof("NodeController observed a Node deletion: %v", deleted[i].Name)
recordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from NodeController", deleted[i].Name))
delete(nc.knownNodeSet, deleted[i].Name)
}
zoneToNodeConditions := map[string][]*v1.NodeCondition{}
for i := range nodes.Items {
var gracePeriod time.Duration
var observedReadyCondition v1.NodeCondition
var currentReadyCondition *v1.NodeCondition
nodeCopy, err := api.Scheme.DeepCopy(&nodes.Items[i])
if err != nil {
utilruntime.HandleError(err)
continue
}
node := nodeCopy.(*v1.Node)
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*nodeStatusUpdateRetry, func() (bool, error) {
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node)
if err == nil {
return true, nil
}
name := node.Name
node, err = nc.kubeClient.Core().Nodes().Get(name, metav1.GetOptions{})
if err != nil {
glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name)
return false, err
}
return false, nil
}); err != nil {
glog.Errorf("Update status of Node %v from NodeController error : %v. "+
"Skipping - no pods will be evicted.", node.Name, err)
continue
}
// We do not treat a master node as a part of the cluster for network disruption checking.
if !system.IsMasterNode(node.Name) {
zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition)
}
decisionTimestamp := nc.now()
if currentReadyCondition != nil {
// Check eviction timeout against decisionTimestamp
if observedReadyCondition.Status == v1.ConditionFalse &&
decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
if nc.evictPods(node) {
glog.V(2).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout)
}
}
if observedReadyCondition.Status == v1.ConditionUnknown &&
decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) {
if nc.evictPods(node) {
glog.V(2).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod)
}
}
if observedReadyCondition.Status == v1.ConditionTrue {
if nc.cancelPodEviction(node) {
glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
}
}
// Report node event.
if currentReadyCondition.Status != v1.ConditionTrue && observedReadyCondition.Status == v1.ConditionTrue {
recordNodeStatusChange(nc.recorder, node, "NodeNotReady")
if err = markAllPodsNotReady(nc.kubeClient, node); err != nil {
utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err))
}
}
// Check with the cloud provider to see if the node still exists. If it
// doesn't, delete the node immediately.
if currentReadyCondition.Status != v1.ConditionTrue && nc.cloud != nil {
exists, err := nc.nodeExistsInCloudProvider(types.NodeName(node.Name))
if err != nil {
glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err)
continue
}
if !exists {
glog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name)
recordNodeEvent(nc.recorder, node.Name, string(node.UID), v1.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name))
go func(nodeName string) {
defer utilruntime.HandleCrash()
// Kubelet is not reporting and Cloud Provider says node
// is gone. Delete it without worrying about grace
// periods.
if err := forcefullyDeleteNode(nc.kubeClient, nodeName); err != nil {
glog.Errorf("Unable to forcefully delete node %q: %v", nodeName, err)
}
}(node.Name)
}
}
}
}
nc.handleDisruption(zoneToNodeConditions, &nodes)
return nil
}
func (nc *NodeController) handleDisruption(zoneToNodeConditions map[string][]*v1.NodeCondition, nodes *v1.NodeList) {
newZoneStates := map[string]zoneState{}
allAreFullyDisrupted := true
for k, v := range zoneToNodeConditions {
ZoneSize.WithLabelValues(k).Set(float64(len(v)))
unhealthy, newState := nc.computeZoneStateFunc(v)
ZoneHealth.WithLabelValues(k).Set(float64(100*(len(v)-unhealthy)) / float64(len(v)))
UnhealthyNodes.WithLabelValues(k).Set(float64(unhealthy))
if newState != stateFullDisruption {
allAreFullyDisrupted = false
}
newZoneStates[k] = newState
if _, had := nc.zoneStates[k]; !had {
nc.zoneStates[k] = stateInitial
}
}
allWasFullyDisrupted := true
for k, v := range nc.zoneStates {
if _, have := zoneToNodeConditions[k]; !have {
ZoneSize.WithLabelValues(k).Set(0)
ZoneHealth.WithLabelValues(k).Set(100)
UnhealthyNodes.WithLabelValues(k).Set(0)
delete(nc.zoneStates, k)
continue
}
if v != stateFullDisruption {
allWasFullyDisrupted = false
break
}
}
// At least one node was responding in previous pass or in the current pass. Semantics is as follows:
// - if the new state is "partialDisruption" we call a user defined function that returns a new limiter to use,
// - if the new state is "normal" we resume normal operation (go back to default limiter settings),
// - if new state is "fullDisruption" we restore normal eviction rate,
// - unless all zones in the cluster are in "fullDisruption" - in that case we stop all evictions.
if !allAreFullyDisrupted || !allWasFullyDisrupted {
// We're switching to full disruption mode
if allAreFullyDisrupted {
glog.V(0).Info("NodeController detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes.Items {
nc.cancelPodEviction(&nodes.Items[i])
}
// We stop all evictions.
for k := range nc.zonePodEvictor {
nc.zonePodEvictor[k].SwapLimiter(0)
}
for k := range nc.zoneStates {
nc.zoneStates[k] = stateFullDisruption
}
// All rate limiters are updated, so we can return early here.
return
}
// We're exiting full disruption mode
if allWasFullyDisrupted {
glog.V(0).Info("NodeController detected that some Nodes are Ready. Exiting master disruption mode.")
// When exiting disruption mode update probe timestamps on all Nodes.
now := nc.now()
for i := range nodes.Items {
v := nc.nodeStatusMap[nodes.Items[i].Name]
v.probeTimestamp = now
v.readyTransitionTimestamp = now
nc.nodeStatusMap[nodes.Items[i].Name] = v
}
// We reset all rate limiters to settings appropriate for the given state.
for k := range nc.zonePodEvictor {
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newZoneStates[k])
nc.zoneStates[k] = newZoneStates[k]
}
return
}
// We know that there's at least one not-fully disrupted so,
// we can use default behavior for rate limiters
for k, v := range nc.zoneStates {
newState := newZoneStates[k]
if v == newState {
continue
}
glog.V(0).Infof("NodeController detected that zone %v is now in state %v.", k, newState)
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
nc.zoneStates[k] = newState
}
}
}
func (nc *NodeController) setLimiterInZone(zone string, zoneSize int, state zoneState) {
switch state {
case stateNormal:
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
case statePartialDisruption:
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize))
case stateFullDisruption:
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize))
}
}
// For a given node checks its conditions and tries to update it. Returns grace period to which given node
// is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *NodeController) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
var err error
var gracePeriod time.Duration
var observedReadyCondition v1.NodeCondition
_, currentReadyCondition := v1.GetNodeCondition(&node.Status, v1.NodeReady)
if currentReadyCondition == nil {
// If ready condition is nil, then kubelet (or nodecontroller) never posted node status.
// A fake ready condition is created, where LastProbeTime and LastTransitionTime is set
// to node.CreationTimestamp to avoid handle the corner case.
observedReadyCondition = v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: node.CreationTimestamp,
}
gracePeriod = nc.nodeStartupGracePeriod
nc.nodeStatusMap[node.Name] = nodeStatusData{
status: node.Status,
probeTimestamp: node.CreationTimestamp,
readyTransitionTimestamp: node.CreationTimestamp,
}
} else {
// If ready condition is not nil, make a copy of it, since we may modify it in place later.
observedReadyCondition = *currentReadyCondition
gracePeriod = nc.nodeMonitorGracePeriod
}
savedNodeStatus, found := nc.nodeStatusMap[node.Name]
// There are following cases to check:
// - both saved and new status have no Ready Condition set - we leave everything as it is,
// - saved status have no Ready Condition, but current one does - NodeController was restarted with Node data already present in etcd,
// - saved status have some Ready Condition, but current one does not - it's an error, but we fill it up because that's probably a good thing to do,
// - both saved and current statuses have Ready Conditions and they have the same LastProbeTime - nothing happened on that Node, it may be
// unresponsive, so we leave it as it is,
// - both saved and current statuses have Ready Conditions, they have different LastProbeTimes, but the same Ready Condition State -
// everything's in order, no transition occurred, we update only probeTimestamp,
// - both saved and current statuses have Ready Conditions, different LastProbeTimes and different Ready Condition State -
// Ready Condition changed it state since we last seen it, so we update both probeTimestamp and readyTransitionTimestamp.
// TODO: things to consider:
// - if 'LastProbeTime' have gone back in time its probably an error, currently we ignore it,
// - currently only correct Ready State transition outside of Node Controller is marking it ready by Kubelet, we don't check
// if that's the case, but it does not seem necessary.
var savedCondition *v1.NodeCondition
if found {
_, savedCondition = v1.GetNodeCondition(&savedNodeStatus.status, v1.NodeReady)
}
_, observedCondition := v1.GetNodeCondition(&node.Status, v1.NodeReady)
if !found {
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition == nil && observedCondition != nil {
glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition != nil && observedCondition == nil {
glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
// TODO: figure out what to do in this case. For now we do the same thing as above.
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime {
var transitionTime metav1.Time
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
// otherwise we leave it as it is.
if savedCondition.LastTransitionTime != observedCondition.LastTransitionTime {
glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition.Status, observedCondition)
transitionTime = nc.now()
} else {
transitionTime = savedNodeStatus.readyTransitionTimestamp
}
if glog.V(5) {
glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status)
} else {
glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
}
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: transitionTime,
}
}
nc.nodeStatusMap[node.Name] = savedNodeStatus
if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) {
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
// (regardless of its current value) in the master.
if currentReadyCondition == nil {
glog.V(2).Infof("node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: fmt.Sprintf("Kubelet never posted node status."),
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nc.now(),
})
} else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition)
if observedReadyCondition.Status != v1.ConditionUnknown {
currentReadyCondition.Status = v1.ConditionUnknown
currentReadyCondition.Reason = "NodeStatusUnknown"
currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
// LastProbeTime is the last time we heard from kubelet.
currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime
currentReadyCondition.LastTransitionTime = nc.now()
}
}
// Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update
// it to Unknown (regardless of its current value) in the master.
// TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code.
_, oodCondition := v1.GetNodeCondition(&node.Status, v1.NodeOutOfDisk)
if oodCondition == nil {
glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: fmt.Sprintf("Kubelet never posted node status."),
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nc.now(),
})
} else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last out of disk condition is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), oodCondition)
if oodCondition.Status != v1.ConditionUnknown {
oodCondition.Status = v1.ConditionUnknown
oodCondition.Reason = "NodeStatusUnknown"
oodCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
oodCondition.LastTransitionTime = nc.now()
}
}
_, currentCondition := v1.GetNodeCondition(&node.Status, v1.NodeReady)
if !api.Semantic.DeepEqual(currentCondition, &observedReadyCondition) {
if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil {
glog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err
} else {
nc.nodeStatusMap[node.Name] = nodeStatusData{
status: node.Status,
probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp,
readyTransitionTimestamp: nc.now(),
}
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
}
}
}
return gracePeriod, observedReadyCondition, currentReadyCondition, err
}
func (nc *NodeController) checkForNodeAddedDeleted(nodes *v1.NodeList) (added, deleted []*v1.Node) {
for i := range nodes.Items {
if _, has := nc.knownNodeSet[nodes.Items[i].Name]; !has {
added = append(added, &nodes.Items[i])
}
}
// If there's a difference between lengths of known Nodes and observed nodes
// we must have removed some Node.
if len(nc.knownNodeSet)+len(added) != len(nodes.Items) {
knowSetCopy := map[string]*v1.Node{}
for k, v := range nc.knownNodeSet {
knowSetCopy[k] = v
}
for i := range nodes.Items {
delete(knowSetCopy, nodes.Items[i].Name)
}
for i := range knowSetCopy {
deleted = append(deleted, knowSetCopy[i])
}
}
return
}
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
// returns true if an eviction was queued.
func (nc *NodeController) cancelPodEviction(node *v1.Node) bool {
zone := utilnode.GetZoneKey(node)
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
if wasDeleting {
glog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name)
return true
}
return false
}
// evictPods queues an eviction for the provided node name, and returns false if the node is already
// queued for eviction.
func (nc *NodeController) evictPods(node *v1.Node) bool {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
return nc.zonePodEvictor[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID))
}
// Default value for cluster eviction rate - we take nodeNum for consistency with ReducedQPSFunc.
func (nc *NodeController) HealthyQPSFunc(nodeNum int) float32 {
return nc.evictionLimiterQPS
}
// If the cluster is large make evictions slower, if they're small stop evictions altogether.
func (nc *NodeController) ReducedQPSFunc(nodeNum int) float32 {
if int32(nodeNum) > nc.largeClusterThreshold {
return nc.secondaryEvictionLimiterQPS
}
return 0
}
// This function is expected to get a slice of NodeReadyConditions for all Nodes in a given zone.
// The zone is considered:
// - fullyDisrupted if there're no Ready Nodes,
// - partiallyDisrupted if at least than nc.unhealthyZoneThreshold percent of Nodes are not Ready,
// - normal otherwise
func (nc *NodeController) ComputeZoneState(nodeReadyConditions []*v1.NodeCondition) (int, zoneState) {
readyNodes := 0
notReadyNodes := 0
for i := range nodeReadyConditions {
if nodeReadyConditions[i] != nil && nodeReadyConditions[i].Status == v1.ConditionTrue {
readyNodes++
} else {
notReadyNodes++
}
}
switch {
case readyNodes == 0 && notReadyNodes > 0:
return notReadyNodes, stateFullDisruption
case notReadyNodes > 2 && float32(notReadyNodes)/float32(notReadyNodes+readyNodes) >= nc.unhealthyZoneThreshold:
return notReadyNodes, statePartialDisruption
default:
return notReadyNodes, stateNormal
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,282 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"container/heap"
"sync"
"time"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/util/flowcontrol"
"github.com/golang/glog"
)
// TimedValue is a value that should be processed at a designated time.
type TimedValue struct {
Value string
// UID could be anything that helps identify the value
UID interface{}
AddedAt time.Time
ProcessAt time.Time
}
// now is used to test time
var now func() time.Time = time.Now
// TimedQueue is a priority heap where the lowest ProcessAt is at the front of the queue
type TimedQueue []*TimedValue
func (h TimedQueue) Len() int { return len(h) }
func (h TimedQueue) Less(i, j int) bool { return h[i].ProcessAt.Before(h[j].ProcessAt) }
func (h TimedQueue) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *TimedQueue) Push(x interface{}) {
*h = append(*h, x.(*TimedValue))
}
func (h *TimedQueue) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
// A FIFO queue which additionally guarantees that any element can be added only once until
// it is removed.
type UniqueQueue struct {
lock sync.Mutex
queue TimedQueue
set sets.String
}
// Adds a new value to the queue if it wasn't added before, or was explicitly removed by the
// Remove call. Returns true if new value was added.
func (q *UniqueQueue) Add(value TimedValue) bool {
q.lock.Lock()
defer q.lock.Unlock()
if q.set.Has(value.Value) {
return false
}
heap.Push(&q.queue, &value)
q.set.Insert(value.Value)
return true
}
// Replace replaces an existing value in the queue if it already exists, otherwise it does nothing.
// Returns true if the item was found.
func (q *UniqueQueue) Replace(value TimedValue) bool {
q.lock.Lock()
defer q.lock.Unlock()
for i := range q.queue {
if q.queue[i].Value != value.Value {
continue
}
heap.Remove(&q.queue, i)
heap.Push(&q.queue, &value)
return true
}
return false
}
// Removes the value from the queue, but keeps it in the set, so it won't be added second time.
// Returns true if something was removed.
func (q *UniqueQueue) RemoveFromQueue(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
if !q.set.Has(value) {
return false
}
for i, val := range q.queue {
if val.Value == value {
heap.Remove(&q.queue, i)
return true
}
}
return false
}
// Removes the value from the queue, so Get() call won't return it, and allow subsequent addition
// of the given value. If the value is not present does nothing and returns false.
func (q *UniqueQueue) Remove(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
if !q.set.Has(value) {
return false
}
q.set.Delete(value)
for i, val := range q.queue {
if val.Value == value {
heap.Remove(&q.queue, i)
return true
}
}
return true
}
// Returns the oldest added value that wasn't returned yet.
func (q *UniqueQueue) Get() (TimedValue, bool) {
q.lock.Lock()
defer q.lock.Unlock()
if len(q.queue) == 0 {
return TimedValue{}, false
}
result := heap.Pop(&q.queue).(*TimedValue)
q.set.Delete(result.Value)
return *result, true
}
// Head returns the oldest added value that wasn't returned yet without removing it.
func (q *UniqueQueue) Head() (TimedValue, bool) {
q.lock.Lock()
defer q.lock.Unlock()
if len(q.queue) == 0 {
return TimedValue{}, false
}
result := q.queue[0]
return *result, true
}
// Clear removes all items from the queue and duplication preventing set.
func (q *UniqueQueue) Clear() {
q.lock.Lock()
defer q.lock.Unlock()
if q.queue.Len() > 0 {
q.queue = make(TimedQueue, 0)
}
if len(q.set) > 0 {
q.set = sets.NewString()
}
}
// RateLimitedTimedQueue is a unique item priority queue ordered by the expected next time
// of execution. It is also rate limited.
type RateLimitedTimedQueue struct {
queue UniqueQueue
limiterLock sync.Mutex
limiter flowcontrol.RateLimiter
}
// Creates new queue which will use given RateLimiter to oversee execution.
func NewRateLimitedTimedQueue(limiter flowcontrol.RateLimiter) *RateLimitedTimedQueue {
return &RateLimitedTimedQueue{
queue: UniqueQueue{
queue: TimedQueue{},
set: sets.NewString(),
},
limiter: limiter,
}
}
// ActionFunc takes a timed value and returns false if the item must be retried, with an optional
// time.Duration if some minimum wait interval should be used.
type ActionFunc func(TimedValue) (bool, time.Duration)
// Try processes the queue. Ends prematurely if RateLimiter forbids an action and leak is true.
// Otherwise, requeues the item to be processed. Each value is processed once if fn returns true,
// otherwise it is added back to the queue. The returned remaining is used to identify the minimum
// time to execute the next item in the queue. The same value is processed only once unless
// Remove is explicitly called on it (it's done by the cancelPodEviction function in NodeController
// when Node becomes Ready again)
// TODO: figure out a good way to do garbage collection for all Nodes that were removed from
// the cluster.
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
val, ok := q.queue.Head()
q.limiterLock.Lock()
defer q.limiterLock.Unlock()
for ok {
// rate limit the queue checking
if !q.limiter.TryAccept() {
glog.V(10).Infof("Try rate limited for value: %v", val)
// Try again later
break
}
now := now()
if now.Before(val.ProcessAt) {
break
}
if ok, wait := fn(val); !ok {
val.ProcessAt = now.Add(wait + 1)
q.queue.Replace(val)
} else {
q.queue.RemoveFromQueue(val.Value)
}
val, ok = q.queue.Head()
}
}
// Adds value to the queue to be processed. Won't add the same value(comparsion by value) a second time
// if it was already added and not removed.
func (q *RateLimitedTimedQueue) Add(value string, uid interface{}) bool {
now := now()
return q.queue.Add(TimedValue{
Value: value,
UID: uid,
AddedAt: now,
ProcessAt: now,
})
}
// Removes Node from the Evictor. The Node won't be processed until added again.
func (q *RateLimitedTimedQueue) Remove(value string) bool {
return q.queue.Remove(value)
}
// Removes all items from the queue
func (q *RateLimitedTimedQueue) Clear() {
q.queue.Clear()
}
// SwapLimiter safely swaps current limiter for this queue with the passed one if capacities or qps's differ.
func (q *RateLimitedTimedQueue) SwapLimiter(newQPS float32) {
q.limiterLock.Lock()
defer q.limiterLock.Unlock()
if q.limiter.QPS() == newQPS {
return
}
var newLimiter flowcontrol.RateLimiter
if newQPS <= 0 {
newLimiter = flowcontrol.NewFakeNeverRateLimiter()
} else {
newLimiter = flowcontrol.NewTokenBucketRateLimiter(newQPS, evictionRateLimiterBurst)
}
// If we're currently waiting on limiter, we drain the new one - this is a good approach when Burst value is 1
// TODO: figure out if we need to support higher Burst values and decide on the drain logic, should we keep:
// - saturation (percentage of used tokens)
// - number of used tokens
// - number of available tokens
// - something else
for q.limiter.Saturation() > newLimiter.Saturation() {
// Check if we're not using fake limiter
previousSaturation := newLimiter.Saturation()
newLimiter.TryAccept()
// It's a fake limiter
if newLimiter.Saturation() == previousSaturation {
break
}
}
q.limiter.Stop()
q.limiter = newLimiter
}

View file

@ -0,0 +1,334 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"reflect"
"testing"
"time"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/util/flowcontrol"
)
func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
for i := 0; i < len(lhs); i++ {
if rhs[i].Value != lhs[i] {
return false
}
}
return true
}
func CheckSetEq(lhs, rhs sets.String) bool {
return lhs.HasAll(rhs.List()...) && rhs.HasAll(lhs.List()...)
}
func TestAddNode(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
queuePattern := []string{"first", "second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern := sets.NewString("first", "second", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
}
func TestDelNode(t *testing.T) {
defer func() { now = time.Now }()
var tick int64
now = func() time.Time {
t := time.Unix(tick, 0)
tick++
return t
}
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("first")
queuePattern := []string{"second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern := sets.NewString("second", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("second")
queuePattern = []string{"first", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern = sets.NewString("first", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("third")
queuePattern = []string{"first", "second"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern = sets.NewString("first", "second")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
}
func TestTry(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("second")
deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value)
return true, 0
})
setPattern := sets.NewString("first", "third")
if len(deletedMap) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, deletedMap) {
t.Errorf("Invalid map. Got %v, expected %v", deletedMap, setPattern)
}
}
func TestTryOrdering(t *testing.T) {
defer func() { now = time.Now }()
current := time.Unix(0, 0)
delay := 0
// the current time is incremented by 1ms every time now is invoked
now = func() time.Time {
if delay > 0 {
delay--
} else {
current = current.Add(time.Millisecond)
}
t.Logf("time %d", current.UnixNano())
return current
}
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
order := []string{}
count := 0
hasQueued := false
evictor.Try(func(value TimedValue) (bool, time.Duration) {
count++
t.Logf("eviction %d", count)
if value.ProcessAt.IsZero() {
t.Fatalf("processAt should not be zero")
}
switch value.Value {
case "first":
if !value.AddedAt.Equal(time.Unix(0, time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %d", value.Value, value.AddedAt)
}
case "second":
if !value.AddedAt.Equal(time.Unix(0, 2*time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %d", value.Value, value.AddedAt)
}
if hasQueued {
if !value.ProcessAt.Equal(time.Unix(0, 6*time.Millisecond.Nanoseconds())) {
t.Fatalf("process time for %s is %d", value.Value, value.ProcessAt)
}
break
}
hasQueued = true
delay = 1
t.Logf("going to delay")
return false, 2 * time.Millisecond
case "third":
if !value.AddedAt.Equal(time.Unix(0, 3*time.Millisecond.Nanoseconds())) {
t.Fatalf("added time for %s is %d", value.Value, value.AddedAt)
}
}
order = append(order, value.Value)
return true, 0
})
if !reflect.DeepEqual(order, []string{"first", "third"}) {
t.Fatalf("order was wrong: %v", order)
}
if count != 3 {
t.Fatalf("unexpected iterations: %d", count)
}
}
func TestTryRemovingWhileTry(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
processing := make(chan struct{})
wait := make(chan struct{})
order := []string{}
count := 0
queued := false
// while the Try function is processing "second", remove it from the queue
// we should not see "second" retried.
go func() {
<-processing
evictor.Remove("second")
close(wait)
}()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
count++
if value.AddedAt.IsZero() {
t.Fatalf("added should not be zero")
}
if value.ProcessAt.IsZero() {
t.Fatalf("next should not be zero")
}
if !queued && value.Value == "second" {
queued = true
close(processing)
<-wait
return false, time.Millisecond
}
order = append(order, value.Value)
return true, 0
})
if !reflect.DeepEqual(order, []string{"first", "third"}) {
t.Fatalf("order was wrong: %v", order)
}
if count != 3 {
t.Fatalf("unexpected iterations: %d", count)
}
}
func TestClear(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Clear()
if len(evictor.queue.queue) != 0 {
t.Fatalf("Clear should remove all elements from the queue.")
}
}
func TestSwapLimiter(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
fakeAlways := flowcontrol.NewFakeAlwaysRateLimiter()
qps := evictor.limiter.QPS()
if qps != fakeAlways.QPS() {
t.Fatalf("QPS does not match create one: %v instead of %v", qps, fakeAlways.QPS())
}
evictor.SwapLimiter(0)
qps = evictor.limiter.QPS()
fakeNever := flowcontrol.NewFakeNeverRateLimiter()
if qps != fakeNever.QPS() {
t.Fatalf("QPS does not match create one: %v instead of %v", qps, fakeNever.QPS())
}
createdQPS := float32(5.5)
evictor.SwapLimiter(createdQPS)
qps = evictor.limiter.QPS()
if qps != createdQPS {
t.Fatalf("QPS does not match create one: %v instead of %v", qps, createdQPS)
}
}
func TestAddAfterTry(t *testing.T) {
evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
evictor.Add("first", "11111")
evictor.Add("second", "22222")
evictor.Add("third", "33333")
evictor.Remove("second")
deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value)
return true, 0
})
setPattern := sets.NewString("first", "third")
if len(deletedMap) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, deletedMap) {
t.Errorf("Invalid map. Got %v, expected %v", deletedMap, setPattern)
}
evictor.Add("first", "11111")
evictor.Try(func(value TimedValue) (bool, time.Duration) {
t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.")
return true, 0
})
}

View file

@ -0,0 +1,41 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["test_utils.go"],
tags = ["automanaged"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/client/clientset_generated/clientset/fake:go_default_library",
"//pkg/client/clientset_generated/clientset/typed/core/v1:go_default_library",
"//pkg/util/clock:go_default_library",
"//pkg/util/node:go_default_library",
"//vendor:k8s.io/apimachinery/pkg/api/errors",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/watch",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View file

@ -0,0 +1,352 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testutil
import (
"errors"
"fmt"
"sync"
"time"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake"
v1core "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/core/v1"
"k8s.io/kubernetes/pkg/util/clock"
utilnode "k8s.io/kubernetes/pkg/util/node"
)
// FakeNodeHandler is a fake implementation of NodesInterface and NodeInterface. It
// allows test cases to have fine-grained control over mock behaviors. We also need
// PodsInterface and PodInterface to test list & delet pods, which is implemented in
// the embedded client.Fake field.
type FakeNodeHandler struct {
*fake.Clientset
// Input: Hooks determine if request is valid or not
CreateHook func(*FakeNodeHandler, *v1.Node) bool
Existing []*v1.Node
// Output
CreatedNodes []*v1.Node
DeletedNodes []*v1.Node
UpdatedNodes []*v1.Node
UpdatedNodeStatuses []*v1.Node
RequestCount int
// Synchronization
lock sync.Mutex
DeleteWaitChan chan struct{}
}
type FakeLegacyHandler struct {
v1core.CoreV1Interface
n *FakeNodeHandler
}
// GetUpdatedNodesCopy returns a slice of Nodes with updates applied.
func (c *FakeNodeHandler) GetUpdatedNodesCopy() []*v1.Node {
c.lock.Lock()
defer c.lock.Unlock()
updatedNodesCopy := make([]*v1.Node, len(c.UpdatedNodes), len(c.UpdatedNodes))
for i, ptr := range c.UpdatedNodes {
updatedNodesCopy[i] = ptr
}
return updatedNodesCopy
}
// Core returns fake CoreInterface.
func (c *FakeNodeHandler) Core() v1core.CoreV1Interface {
return &FakeLegacyHandler{c.Clientset.Core(), c}
}
// Nodes return fake NodeInterfaces.
func (m *FakeLegacyHandler) Nodes() v1core.NodeInterface {
return m.n
}
// Create adds a new Node to the fake store.
func (m *FakeNodeHandler) Create(node *v1.Node) (*v1.Node, error) {
m.lock.Lock()
defer func() {
m.RequestCount++
m.lock.Unlock()
}()
for _, n := range m.Existing {
if n.Name == node.Name {
return nil, apierrors.NewAlreadyExists(api.Resource("nodes"), node.Name)
}
}
if m.CreateHook == nil || m.CreateHook(m, node) {
nodeCopy := *node
m.CreatedNodes = append(m.CreatedNodes, &nodeCopy)
return node, nil
} else {
return nil, errors.New("Create error.")
}
}
// Get returns a Node from the fake store.
func (m *FakeNodeHandler) Get(name string, opts metav1.GetOptions) (*v1.Node, error) {
m.lock.Lock()
defer func() {
m.RequestCount++
m.lock.Unlock()
}()
for i := range m.UpdatedNodes {
if m.UpdatedNodes[i].Name == name {
nodeCopy := *m.UpdatedNodes[i]
return &nodeCopy, nil
}
}
for i := range m.Existing {
if m.Existing[i].Name == name {
nodeCopy := *m.Existing[i]
return &nodeCopy, nil
}
}
return nil, nil
}
// List returns a list of Nodes from the fake store.
func (m *FakeNodeHandler) List(opts v1.ListOptions) (*v1.NodeList, error) {
m.lock.Lock()
defer func() {
m.RequestCount++
m.lock.Unlock()
}()
var nodes []*v1.Node
for i := 0; i < len(m.UpdatedNodes); i++ {
if !contains(m.UpdatedNodes[i], m.DeletedNodes) {
nodes = append(nodes, m.UpdatedNodes[i])
}
}
for i := 0; i < len(m.Existing); i++ {
if !contains(m.Existing[i], m.DeletedNodes) && !contains(m.Existing[i], nodes) {
nodes = append(nodes, m.Existing[i])
}
}
for i := 0; i < len(m.CreatedNodes); i++ {
if !contains(m.CreatedNodes[i], m.DeletedNodes) && !contains(m.CreatedNodes[i], nodes) {
nodes = append(nodes, m.CreatedNodes[i])
}
}
nodeList := &v1.NodeList{}
for _, node := range nodes {
nodeList.Items = append(nodeList.Items, *node)
}
return nodeList, nil
}
// Delete delets a Node from the fake store.
func (m *FakeNodeHandler) Delete(id string, opt *v1.DeleteOptions) error {
m.lock.Lock()
defer func() {
m.RequestCount++
if m.DeleteWaitChan != nil {
m.DeleteWaitChan <- struct{}{}
}
m.lock.Unlock()
}()
m.DeletedNodes = append(m.DeletedNodes, NewNode(id))
return nil
}
// DeleteCollection deletes a collection of Nodes from the fake store.
func (m *FakeNodeHandler) DeleteCollection(opt *v1.DeleteOptions, listOpts v1.ListOptions) error {
return nil
}
// Update updates a Node in the fake store.
func (m *FakeNodeHandler) Update(node *v1.Node) (*v1.Node, error) {
m.lock.Lock()
defer func() {
m.RequestCount++
m.lock.Unlock()
}()
nodeCopy := *node
for i, updateNode := range m.UpdatedNodes {
if updateNode.Name == nodeCopy.Name {
m.UpdatedNodes[i] = &nodeCopy
return node, nil
}
}
m.UpdatedNodes = append(m.UpdatedNodes, &nodeCopy)
return node, nil
}
// UpdateStatus updates a status of a Node in the fake store.
func (m *FakeNodeHandler) UpdateStatus(node *v1.Node) (*v1.Node, error) {
m.lock.Lock()
defer func() {
m.RequestCount++
m.lock.Unlock()
}()
nodeCopy := *node
m.UpdatedNodeStatuses = append(m.UpdatedNodeStatuses, &nodeCopy)
return node, nil
}
// PatchStatus patches a status of a Node in the fake store.
func (m *FakeNodeHandler) PatchStatus(nodeName string, data []byte) (*v1.Node, error) {
m.RequestCount++
return &v1.Node{}, nil
}
// Watch watches Nodes in a fake store.
func (m *FakeNodeHandler) Watch(opts v1.ListOptions) (watch.Interface, error) {
return watch.NewFake(), nil
}
// Patch patches a Node in the fake store.
func (m *FakeNodeHandler) Patch(name string, pt api.PatchType, data []byte, subresources ...string) (*v1.Node, error) {
return nil, nil
}
// FakeRecorder is used as a fake during testing.
type FakeRecorder struct {
source v1.EventSource
Events []*v1.Event
clock clock.Clock
}
// Event emits a fake event to the fake recorder
func (f *FakeRecorder) Event(obj runtime.Object, eventtype, reason, message string) {
f.generateEvent(obj, metav1.Now(), eventtype, reason, message)
}
// Eventf emits a fake formatted event to the fake recorder
func (f *FakeRecorder) Eventf(obj runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) {
f.Event(obj, eventtype, reason, fmt.Sprintf(messageFmt, args...))
}
// PastEventf is a no-op
func (f *FakeRecorder) PastEventf(obj runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) {
}
func (f *FakeRecorder) generateEvent(obj runtime.Object, timestamp metav1.Time, eventtype, reason, message string) {
ref, err := v1.GetReference(obj)
if err != nil {
return
}
event := f.makeEvent(ref, eventtype, reason, message)
event.Source = f.source
if f.Events != nil {
fmt.Println("write event")
f.Events = append(f.Events, event)
}
}
func (f *FakeRecorder) makeEvent(ref *v1.ObjectReference, eventtype, reason, message string) *v1.Event {
fmt.Println("make event")
t := metav1.Time{Time: f.clock.Now()}
namespace := ref.Namespace
if namespace == "" {
namespace = v1.NamespaceDefault
}
return &v1.Event{
ObjectMeta: v1.ObjectMeta{
Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()),
Namespace: namespace,
},
InvolvedObject: *ref,
Reason: reason,
Message: message,
FirstTimestamp: t,
LastTimestamp: t,
Count: 1,
Type: eventtype,
}
}
// NewFakeRecorder returns a pointer to a newly constructed FakeRecorder.
func NewFakeRecorder() *FakeRecorder {
return &FakeRecorder{
source: v1.EventSource{Component: "nodeControllerTest"},
Events: []*v1.Event{},
clock: clock.NewFakeClock(time.Now()),
}
}
// NewNode is a helper function for creating Nodes for testing.
func NewNode(name string) *v1.Node {
return &v1.Node{
ObjectMeta: v1.ObjectMeta{Name: name},
Spec: v1.NodeSpec{
ExternalID: name,
},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
},
},
}
}
// NewPod is a helper function for creating Pods for testing.
func NewPod(name, host string) *v1.Pod {
pod := &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
Name: name,
},
Spec: v1.PodSpec{
NodeName: host,
},
Status: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
Status: v1.ConditionTrue,
},
},
},
}
return pod
}
func contains(node *v1.Node, nodes []*v1.Node) bool {
for i := 0; i < len(nodes); i++ {
if node.Name == nodes[i].Name {
return true
}
}
return false
}
// GetZones returns list of zones for all Nodes stored in FakeNodeHandler
func GetZones(nodeHandler *FakeNodeHandler) []string {
nodes, _ := nodeHandler.List(v1.ListOptions{})
zones := sets.NewString()
for _, node := range nodes.Items {
zones.Insert(utilnode.GetZoneKey(&node))
}
return zones.List()
}
// CreateZoneID returns a single zoneID for a given region and zone.
func CreateZoneID(region, zone string) string {
return region + ":\x00:" + zone
}