52baf68d50
Signed-off-by: Michał Żyłowski <michal.zylowski@intel.com>
824 lines
31 KiB
Go
824 lines
31 KiB
Go
/*
|
|
Copyright 2014 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package controller
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/golang/glog"
|
|
"k8s.io/apimachinery/pkg/api/meta"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
"k8s.io/client-go/util/clock"
|
|
"k8s.io/client-go/util/integer"
|
|
"k8s.io/kubernetes/pkg/api"
|
|
"k8s.io/kubernetes/pkg/api/v1"
|
|
"k8s.io/kubernetes/pkg/api/validation"
|
|
extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
|
|
"k8s.io/kubernetes/pkg/client/cache"
|
|
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
|
|
"k8s.io/kubernetes/pkg/client/record"
|
|
)
|
|
|
|
const (
|
|
// If a watch drops a delete event for a pod, it'll take this long
|
|
// before a dormant controller waiting for those packets is woken up anyway. It is
|
|
// specifically targeted at the case where some problem prevents an update
|
|
// of expectations, without it the controller could stay asleep forever. This should
|
|
// be set based on the expected latency of watch events.
|
|
//
|
|
// Currently a controller can service (create *and* observe the watch events for said
|
|
// creation) about 10 pods a second, so it takes about 1 min to service
|
|
// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
|
|
// latency/pod at the scale of 3000 pods over 100 nodes.
|
|
ExpectationsTimeout = 5 * time.Minute
|
|
)
|
|
|
|
var (
|
|
KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
|
|
)
|
|
|
|
type ResyncPeriodFunc func() time.Duration
|
|
|
|
// Returns 0 for resyncPeriod in case resyncing is not needed.
|
|
func NoResyncPeriodFunc() time.Duration {
|
|
return 0
|
|
}
|
|
|
|
// StaticResyncPeriodFunc returns the resync period specified
|
|
func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc {
|
|
return func() time.Duration {
|
|
return resyncPeriod
|
|
}
|
|
}
|
|
|
|
// Expectations are a way for controllers to tell the controller manager what they expect. eg:
|
|
// ControllerExpectations: {
|
|
// controller1: expects 2 adds in 2 minutes
|
|
// controller2: expects 2 dels in 2 minutes
|
|
// controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met
|
|
// }
|
|
//
|
|
// Implementation:
|
|
// ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion
|
|
// ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller
|
|
//
|
|
// * Once set expectations can only be lowered
|
|
// * A controller isn't synced till its expectations are either fulfilled, or expire
|
|
// * Controllers that don't set expectations will get woken up for every matching controllee
|
|
|
|
// ExpKeyFunc to parse out the key from a ControlleeExpectation
|
|
var ExpKeyFunc = func(obj interface{}) (string, error) {
|
|
if e, ok := obj.(*ControlleeExpectations); ok {
|
|
return e.key, nil
|
|
}
|
|
return "", fmt.Errorf("Could not find key for obj %#v", obj)
|
|
}
|
|
|
|
// ControllerExpectationsInterface is an interface that allows users to set and wait on expectations.
|
|
// Only abstracted out for testing.
|
|
// Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different
|
|
// types of controllers, because the keys might conflict across types.
|
|
type ControllerExpectationsInterface interface {
|
|
GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error)
|
|
SatisfiedExpectations(controllerKey string) bool
|
|
DeleteExpectations(controllerKey string)
|
|
SetExpectations(controllerKey string, add, del int) error
|
|
ExpectCreations(controllerKey string, adds int) error
|
|
ExpectDeletions(controllerKey string, dels int) error
|
|
CreationObserved(controllerKey string)
|
|
DeletionObserved(controllerKey string)
|
|
RaiseExpectations(controllerKey string, add, del int)
|
|
LowerExpectations(controllerKey string, add, del int)
|
|
}
|
|
|
|
// ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync.
|
|
type ControllerExpectations struct {
|
|
cache.Store
|
|
}
|
|
|
|
// GetExpectations returns the ControlleeExpectations of the given controller.
|
|
func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) {
|
|
if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
|
|
return exp.(*ControlleeExpectations), true, nil
|
|
} else {
|
|
return nil, false, err
|
|
}
|
|
}
|
|
|
|
// DeleteExpectations deletes the expectations of the given controller from the TTLStore.
|
|
func (r *ControllerExpectations) DeleteExpectations(controllerKey string) {
|
|
if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
|
|
if err := r.Delete(exp); err != nil {
|
|
glog.V(2).Infof("Error deleting expectations for controller %v: %v", controllerKey, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
|
|
// Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
|
|
// manager.
|
|
func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool {
|
|
if exp, exists, err := r.GetExpectations(controllerKey); exists {
|
|
if exp.Fulfilled() {
|
|
return true
|
|
} else if exp.isExpired() {
|
|
glog.V(4).Infof("Controller expectations expired %#v", exp)
|
|
return true
|
|
} else {
|
|
glog.V(4).Infof("Controller still waiting on expectations %#v", exp)
|
|
return false
|
|
}
|
|
} else if err != nil {
|
|
glog.V(2).Infof("Error encountered while checking expectations %#v, forcing sync", err)
|
|
} else {
|
|
// When a new controller is created, it doesn't have expectations.
|
|
// When it doesn't see expected watch events for > TTL, the expectations expire.
|
|
// - In this case it wakes up, creates/deletes controllees, and sets expectations again.
|
|
// When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire.
|
|
// - In this case it continues without setting expectations till it needs to create/delete controllees.
|
|
glog.V(4).Infof("Controller %v either never recorded expectations, or the ttl expired.", controllerKey)
|
|
}
|
|
// Trigger a sync if we either encountered and error (which shouldn't happen since we're
|
|
// getting from local store) or this controller hasn't established expectations.
|
|
return true
|
|
}
|
|
|
|
// TODO: Extend ExpirationCache to support explicit expiration.
|
|
// TODO: Make this possible to disable in tests.
|
|
// TODO: Support injection of clock.
|
|
func (exp *ControlleeExpectations) isExpired() bool {
|
|
return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout
|
|
}
|
|
|
|
// SetExpectations registers new expectations for the given controller. Forgets existing expectations.
|
|
func (r *ControllerExpectations) SetExpectations(controllerKey string, add, del int) error {
|
|
exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()}
|
|
glog.V(4).Infof("Setting expectations %#v", exp)
|
|
return r.Add(exp)
|
|
}
|
|
|
|
func (r *ControllerExpectations) ExpectCreations(controllerKey string, adds int) error {
|
|
return r.SetExpectations(controllerKey, adds, 0)
|
|
}
|
|
|
|
func (r *ControllerExpectations) ExpectDeletions(controllerKey string, dels int) error {
|
|
return r.SetExpectations(controllerKey, 0, dels)
|
|
}
|
|
|
|
// Decrements the expectation counts of the given controller.
|
|
func (r *ControllerExpectations) LowerExpectations(controllerKey string, add, del int) {
|
|
if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
|
|
exp.Add(int64(-add), int64(-del))
|
|
// The expectations might've been modified since the update on the previous line.
|
|
glog.V(4).Infof("Lowered expectations %#v", exp)
|
|
}
|
|
}
|
|
|
|
// Increments the expectation counts of the given controller.
|
|
func (r *ControllerExpectations) RaiseExpectations(controllerKey string, add, del int) {
|
|
if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
|
|
exp.Add(int64(add), int64(del))
|
|
// The expectations might've been modified since the update on the previous line.
|
|
glog.V(4).Infof("Raised expectations %#v", exp)
|
|
}
|
|
}
|
|
|
|
// CreationObserved atomically decrements the `add` expectation count of the given controller.
|
|
func (r *ControllerExpectations) CreationObserved(controllerKey string) {
|
|
r.LowerExpectations(controllerKey, 1, 0)
|
|
}
|
|
|
|
// DeletionObserved atomically decrements the `del` expectation count of the given controller.
|
|
func (r *ControllerExpectations) DeletionObserved(controllerKey string) {
|
|
r.LowerExpectations(controllerKey, 0, 1)
|
|
}
|
|
|
|
// Expectations are either fulfilled, or expire naturally.
|
|
type Expectations interface {
|
|
Fulfilled() bool
|
|
}
|
|
|
|
// ControlleeExpectations track controllee creates/deletes.
|
|
type ControlleeExpectations struct {
|
|
// Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms
|
|
// See: https://golang.org/pkg/sync/atomic/ for more information
|
|
add int64
|
|
del int64
|
|
key string
|
|
timestamp time.Time
|
|
}
|
|
|
|
// Add increments the add and del counters.
|
|
func (e *ControlleeExpectations) Add(add, del int64) {
|
|
atomic.AddInt64(&e.add, add)
|
|
atomic.AddInt64(&e.del, del)
|
|
}
|
|
|
|
// Fulfilled returns true if this expectation has been fulfilled.
|
|
func (e *ControlleeExpectations) Fulfilled() bool {
|
|
// TODO: think about why this line being atomic doesn't matter
|
|
return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0
|
|
}
|
|
|
|
// GetExpectations returns the add and del expectations of the controllee.
|
|
func (e *ControlleeExpectations) GetExpectations() (int64, int64) {
|
|
return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del)
|
|
}
|
|
|
|
// NewControllerExpectations returns a store for ControllerExpectations.
|
|
func NewControllerExpectations() *ControllerExpectations {
|
|
return &ControllerExpectations{cache.NewStore(ExpKeyFunc)}
|
|
}
|
|
|
|
// UIDSetKeyFunc to parse out the key from a UIDSet.
|
|
var UIDSetKeyFunc = func(obj interface{}) (string, error) {
|
|
if u, ok := obj.(*UIDSet); ok {
|
|
return u.key, nil
|
|
}
|
|
return "", fmt.Errorf("Could not find key for obj %#v", obj)
|
|
}
|
|
|
|
// UIDSet holds a key and a set of UIDs. Used by the
|
|
// UIDTrackingControllerExpectations to remember which UID it has seen/still
|
|
// waiting for.
|
|
type UIDSet struct {
|
|
sets.String
|
|
key string
|
|
}
|
|
|
|
// UIDTrackingControllerExpectations tracks the UID of the pods it deletes.
|
|
// This cache is needed over plain old expectations to safely handle graceful
|
|
// deletion. The desired behavior is to treat an update that sets the
|
|
// DeletionTimestamp on an object as a delete. To do so consistenly, one needs
|
|
// to remember the expected deletes so they aren't double counted.
|
|
// TODO: Track creates as well (#22599)
|
|
type UIDTrackingControllerExpectations struct {
|
|
ControllerExpectationsInterface
|
|
// TODO: There is a much nicer way to do this that involves a single store,
|
|
// a lock per entry, and a ControlleeExpectationsInterface type.
|
|
uidStoreLock sync.Mutex
|
|
// Store used for the UIDs associated with any expectation tracked via the
|
|
// ControllerExpectationsInterface.
|
|
uidStore cache.Store
|
|
}
|
|
|
|
// GetUIDs is a convenience method to avoid exposing the set of expected uids.
|
|
// The returned set is not thread safe, all modifications must be made holding
|
|
// the uidStoreLock.
|
|
func (u *UIDTrackingControllerExpectations) GetUIDs(controllerKey string) sets.String {
|
|
if uid, exists, err := u.uidStore.GetByKey(controllerKey); err == nil && exists {
|
|
return uid.(*UIDSet).String
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ExpectDeletions records expectations for the given deleteKeys, against the given controller.
|
|
func (u *UIDTrackingControllerExpectations) ExpectDeletions(rcKey string, deletedKeys []string) error {
|
|
u.uidStoreLock.Lock()
|
|
defer u.uidStoreLock.Unlock()
|
|
|
|
if existing := u.GetUIDs(rcKey); existing != nil && existing.Len() != 0 {
|
|
glog.Errorf("Clobbering existing delete keys: %+v", existing)
|
|
}
|
|
expectedUIDs := sets.NewString()
|
|
for _, k := range deletedKeys {
|
|
expectedUIDs.Insert(k)
|
|
}
|
|
glog.V(4).Infof("Controller %v waiting on deletions for: %+v", rcKey, deletedKeys)
|
|
if err := u.uidStore.Add(&UIDSet{expectedUIDs, rcKey}); err != nil {
|
|
return err
|
|
}
|
|
return u.ControllerExpectationsInterface.ExpectDeletions(rcKey, expectedUIDs.Len())
|
|
}
|
|
|
|
// DeletionObserved records the given deleteKey as a deletion, for the given rc.
|
|
func (u *UIDTrackingControllerExpectations) DeletionObserved(rcKey, deleteKey string) {
|
|
u.uidStoreLock.Lock()
|
|
defer u.uidStoreLock.Unlock()
|
|
|
|
uids := u.GetUIDs(rcKey)
|
|
if uids != nil && uids.Has(deleteKey) {
|
|
glog.V(4).Infof("Controller %v received delete for pod %v", rcKey, deleteKey)
|
|
u.ControllerExpectationsInterface.DeletionObserved(rcKey)
|
|
uids.Delete(deleteKey)
|
|
}
|
|
}
|
|
|
|
// DeleteExpectations deletes the UID set and invokes DeleteExpectations on the
|
|
// underlying ControllerExpectationsInterface.
|
|
func (u *UIDTrackingControllerExpectations) DeleteExpectations(rcKey string) {
|
|
u.uidStoreLock.Lock()
|
|
defer u.uidStoreLock.Unlock()
|
|
|
|
u.ControllerExpectationsInterface.DeleteExpectations(rcKey)
|
|
if uidExp, exists, err := u.uidStore.GetByKey(rcKey); err == nil && exists {
|
|
if err := u.uidStore.Delete(uidExp); err != nil {
|
|
glog.V(2).Infof("Error deleting uid expectations for controller %v: %v", rcKey, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// NewUIDTrackingControllerExpectations returns a wrapper around
|
|
// ControllerExpectations that is aware of deleteKeys.
|
|
func NewUIDTrackingControllerExpectations(ce ControllerExpectationsInterface) *UIDTrackingControllerExpectations {
|
|
return &UIDTrackingControllerExpectations{ControllerExpectationsInterface: ce, uidStore: cache.NewStore(UIDSetKeyFunc)}
|
|
}
|
|
|
|
// Reasons for pod events
|
|
const (
|
|
// FailedCreatePodReason is added in an event and in a replica set condition
|
|
// when a pod for a replica set is failed to be created.
|
|
FailedCreatePodReason = "FailedCreate"
|
|
// SuccessfulCreatePodReason is added in an event when a pod for a replica set
|
|
// is successfully created.
|
|
SuccessfulCreatePodReason = "SuccessfulCreate"
|
|
// FailedDeletePodReason is added in an event and in a replica set condition
|
|
// when a pod for a replica set is failed to be deleted.
|
|
FailedDeletePodReason = "FailedDelete"
|
|
// SuccessfulDeletePodReason is added in an event when a pod for a replica set
|
|
// is successfully deleted.
|
|
SuccessfulDeletePodReason = "SuccessfulDelete"
|
|
)
|
|
|
|
// RSControlInterface is an interface that knows how to add or delete
|
|
// ReplicaSets, as well as increment or decrement them. It is used
|
|
// by the deployment controller to ease testing of actions that it takes.
|
|
type RSControlInterface interface {
|
|
PatchReplicaSet(namespace, name string, data []byte) error
|
|
}
|
|
|
|
// RealRSControl is the default implementation of RSControllerInterface.
|
|
type RealRSControl struct {
|
|
KubeClient clientset.Interface
|
|
Recorder record.EventRecorder
|
|
}
|
|
|
|
var _ RSControlInterface = &RealRSControl{}
|
|
|
|
func (r RealRSControl) PatchReplicaSet(namespace, name string, data []byte) error {
|
|
_, err := r.KubeClient.Extensions().ReplicaSets(namespace).Patch(name, types.StrategicMergePatchType, data)
|
|
return err
|
|
}
|
|
|
|
// PodControlInterface is an interface that knows how to add or delete pods
|
|
// created as an interface to allow testing.
|
|
type PodControlInterface interface {
|
|
// CreatePods creates new pods according to the spec.
|
|
CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error
|
|
// CreatePodsOnNode creates a new pod according to the spec on the specified node.
|
|
CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object) error
|
|
// CreatePodsWithControllerRef creates new pods according to the spec, and sets object as the pod's controller.
|
|
CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error
|
|
// DeletePod deletes the pod identified by podID.
|
|
DeletePod(namespace string, podID string, object runtime.Object) error
|
|
// PatchPod patches the pod.
|
|
PatchPod(namespace, name string, data []byte) error
|
|
}
|
|
|
|
// RealPodControl is the default implementation of PodControlInterface.
|
|
type RealPodControl struct {
|
|
KubeClient clientset.Interface
|
|
Recorder record.EventRecorder
|
|
}
|
|
|
|
var _ PodControlInterface = &RealPodControl{}
|
|
|
|
func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set {
|
|
desiredLabels := make(labels.Set)
|
|
for k, v := range template.Labels {
|
|
desiredLabels[k] = v
|
|
}
|
|
return desiredLabels
|
|
}
|
|
|
|
func getPodsFinalizers(template *v1.PodTemplateSpec) []string {
|
|
desiredFinalizers := make([]string, len(template.Finalizers))
|
|
copy(desiredFinalizers, template.Finalizers)
|
|
return desiredFinalizers
|
|
}
|
|
|
|
func getPodsAnnotationSet(template *v1.PodTemplateSpec, object runtime.Object) (labels.Set, error) {
|
|
desiredAnnotations := make(labels.Set)
|
|
for k, v := range template.Annotations {
|
|
desiredAnnotations[k] = v
|
|
}
|
|
createdByRef, err := v1.GetReference(object)
|
|
if err != nil {
|
|
return desiredAnnotations, fmt.Errorf("unable to get controller reference: %v", err)
|
|
}
|
|
|
|
// TODO: this code was not safe previously - as soon as new code came along that switched to v2, old clients
|
|
// would be broken upon reading it. This is explicitly hardcoded to v1 to guarantee predictable deployment.
|
|
// We need to consistently handle this case of annotation versioning.
|
|
codec := api.Codecs.LegacyCodec(schema.GroupVersion{Group: v1.GroupName, Version: "v1"})
|
|
|
|
createdByRefJson, err := runtime.Encode(codec, &v1.SerializedReference{
|
|
Reference: *createdByRef,
|
|
})
|
|
if err != nil {
|
|
return desiredAnnotations, fmt.Errorf("unable to serialize controller reference: %v", err)
|
|
}
|
|
desiredAnnotations[v1.CreatedByAnnotation] = string(createdByRefJson)
|
|
return desiredAnnotations, nil
|
|
}
|
|
|
|
func getPodsPrefix(controllerName string) string {
|
|
// use the dash (if the name isn't too long) to make the pod name a bit prettier
|
|
prefix := fmt.Sprintf("%s-", controllerName)
|
|
if len(validation.ValidatePodName(prefix, true)) != 0 {
|
|
prefix = controllerName
|
|
}
|
|
return prefix
|
|
}
|
|
|
|
func (r RealPodControl) CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error {
|
|
return r.createPods("", namespace, template, object, nil)
|
|
}
|
|
|
|
func (r RealPodControl) CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error {
|
|
if controllerRef == nil {
|
|
return fmt.Errorf("controllerRef is nil")
|
|
}
|
|
if len(controllerRef.APIVersion) == 0 {
|
|
return fmt.Errorf("controllerRef has empty APIVersion")
|
|
}
|
|
if len(controllerRef.Kind) == 0 {
|
|
return fmt.Errorf("controllerRef has empty Kind")
|
|
}
|
|
if controllerRef.Controller == nil || *controllerRef.Controller != true {
|
|
return fmt.Errorf("controllerRef.Controller is not set")
|
|
}
|
|
return r.createPods("", namespace, template, controllerObject, controllerRef)
|
|
}
|
|
|
|
func (r RealPodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object) error {
|
|
return r.createPods(nodeName, namespace, template, object, nil)
|
|
}
|
|
|
|
func (r RealPodControl) PatchPod(namespace, name string, data []byte) error {
|
|
_, err := r.KubeClient.Core().Pods(namespace).Patch(name, types.StrategicMergePatchType, data)
|
|
return err
|
|
}
|
|
|
|
func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) {
|
|
desiredLabels := getPodsLabelSet(template)
|
|
desiredFinalizers := getPodsFinalizers(template)
|
|
desiredAnnotations, err := getPodsAnnotationSet(template, parentObject)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
accessor, err := meta.Accessor(parentObject)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err)
|
|
}
|
|
prefix := getPodsPrefix(accessor.GetName())
|
|
|
|
pod := &v1.Pod{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Labels: desiredLabels,
|
|
Annotations: desiredAnnotations,
|
|
GenerateName: prefix,
|
|
Finalizers: desiredFinalizers,
|
|
},
|
|
}
|
|
if controllerRef != nil {
|
|
pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef)
|
|
}
|
|
clone, err := api.Scheme.DeepCopy(&template.Spec)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
pod.Spec = *clone.(*v1.PodSpec)
|
|
return pod, nil
|
|
}
|
|
|
|
func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
|
|
pod, err := GetPodFromTemplate(template, object, controllerRef)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(nodeName) != 0 {
|
|
pod.Spec.NodeName = nodeName
|
|
}
|
|
if labels.Set(pod.Labels).AsSelectorPreValidated().Empty() {
|
|
return fmt.Errorf("unable to create pods, no labels")
|
|
}
|
|
if newPod, err := r.KubeClient.Core().Pods(namespace).Create(pod); err != nil {
|
|
r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err)
|
|
return fmt.Errorf("unable to create pods: %v", err)
|
|
} else {
|
|
accessor, err := meta.Accessor(object)
|
|
if err != nil {
|
|
glog.Errorf("parentObject does not have ObjectMeta, %v", err)
|
|
return nil
|
|
}
|
|
glog.V(4).Infof("Controller %v created pod %v", accessor.GetName(), newPod.Name)
|
|
r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r RealPodControl) DeletePod(namespace string, podID string, object runtime.Object) error {
|
|
accessor, err := meta.Accessor(object)
|
|
if err != nil {
|
|
return fmt.Errorf("object does not have ObjectMeta, %v", err)
|
|
}
|
|
glog.V(2).Infof("Controller %v deleting pod %v/%v", accessor.GetName(), namespace, podID)
|
|
if err := r.KubeClient.Core().Pods(namespace).Delete(podID, nil); err != nil {
|
|
r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err)
|
|
return fmt.Errorf("unable to delete pods: %v", err)
|
|
} else {
|
|
r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type FakePodControl struct {
|
|
sync.Mutex
|
|
Templates []v1.PodTemplateSpec
|
|
ControllerRefs []metav1.OwnerReference
|
|
DeletePodName []string
|
|
Patches [][]byte
|
|
Err error
|
|
}
|
|
|
|
var _ PodControlInterface = &FakePodControl{}
|
|
|
|
func (f *FakePodControl) PatchPod(namespace, name string, data []byte) error {
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
f.Patches = append(f.Patches, data)
|
|
if f.Err != nil {
|
|
return f.Err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *FakePodControl) CreatePods(namespace string, spec *v1.PodTemplateSpec, object runtime.Object) error {
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
f.Templates = append(f.Templates, *spec)
|
|
if f.Err != nil {
|
|
return f.Err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
f.Templates = append(f.Templates, *spec)
|
|
f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
|
|
if f.Err != nil {
|
|
return f.Err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *FakePodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object) error {
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
f.Templates = append(f.Templates, *template)
|
|
if f.Err != nil {
|
|
return f.Err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *FakePodControl) DeletePod(namespace string, podID string, object runtime.Object) error {
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
f.DeletePodName = append(f.DeletePodName, podID)
|
|
if f.Err != nil {
|
|
return f.Err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *FakePodControl) Clear() {
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
f.DeletePodName = []string{}
|
|
f.Templates = []v1.PodTemplateSpec{}
|
|
f.ControllerRefs = []metav1.OwnerReference{}
|
|
f.Patches = [][]byte{}
|
|
}
|
|
|
|
// ByLogging allows custom sorting of pods so the best one can be picked for getting its logs.
|
|
type ByLogging []*v1.Pod
|
|
|
|
func (s ByLogging) Len() int { return len(s) }
|
|
func (s ByLogging) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
|
|
func (s ByLogging) Less(i, j int) bool {
|
|
// 1. assigned < unassigned
|
|
if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
|
|
return len(s[i].Spec.NodeName) > 0
|
|
}
|
|
// 2. PodRunning < PodUnknown < PodPending
|
|
m := map[v1.PodPhase]int{v1.PodRunning: 0, v1.PodUnknown: 1, v1.PodPending: 2}
|
|
if m[s[i].Status.Phase] != m[s[j].Status.Phase] {
|
|
return m[s[i].Status.Phase] < m[s[j].Status.Phase]
|
|
}
|
|
// 3. ready < not ready
|
|
if v1.IsPodReady(s[i]) != v1.IsPodReady(s[j]) {
|
|
return v1.IsPodReady(s[i])
|
|
}
|
|
// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
|
|
// see https://github.com/kubernetes/kubernetes/issues/22065
|
|
// 4. Been ready for more time < less time < empty time
|
|
if v1.IsPodReady(s[i]) && v1.IsPodReady(s[j]) && !podReadyTime(s[i]).Equal(podReadyTime(s[j])) {
|
|
return afterOrZero(podReadyTime(s[j]), podReadyTime(s[i]))
|
|
}
|
|
// 5. Pods with containers with higher restart counts < lower restart counts
|
|
if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
|
|
return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
|
|
}
|
|
// 6. older pods < newer pods < empty timestamp pods
|
|
if !s[i].CreationTimestamp.Equal(s[j].CreationTimestamp) {
|
|
return afterOrZero(s[j].CreationTimestamp, s[i].CreationTimestamp)
|
|
}
|
|
return false
|
|
}
|
|
|
|
// ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete.
|
|
type ActivePods []*v1.Pod
|
|
|
|
func (s ActivePods) Len() int { return len(s) }
|
|
func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
|
|
func (s ActivePods) Less(i, j int) bool {
|
|
// 1. Unassigned < assigned
|
|
// If only one of the pods is unassigned, the unassigned one is smaller
|
|
if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
|
|
return len(s[i].Spec.NodeName) == 0
|
|
}
|
|
// 2. PodPending < PodUnknown < PodRunning
|
|
m := map[v1.PodPhase]int{v1.PodPending: 0, v1.PodUnknown: 1, v1.PodRunning: 2}
|
|
if m[s[i].Status.Phase] != m[s[j].Status.Phase] {
|
|
return m[s[i].Status.Phase] < m[s[j].Status.Phase]
|
|
}
|
|
// 3. Not ready < ready
|
|
// If only one of the pods is not ready, the not ready one is smaller
|
|
if v1.IsPodReady(s[i]) != v1.IsPodReady(s[j]) {
|
|
return !v1.IsPodReady(s[i])
|
|
}
|
|
// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
|
|
// see https://github.com/kubernetes/kubernetes/issues/22065
|
|
// 4. Been ready for empty time < less time < more time
|
|
// If both pods are ready, the latest ready one is smaller
|
|
if v1.IsPodReady(s[i]) && v1.IsPodReady(s[j]) && !podReadyTime(s[i]).Equal(podReadyTime(s[j])) {
|
|
return afterOrZero(podReadyTime(s[i]), podReadyTime(s[j]))
|
|
}
|
|
// 5. Pods with containers with higher restart counts < lower restart counts
|
|
if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
|
|
return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
|
|
}
|
|
// 6. Empty creation time pods < newer pods < older pods
|
|
if !s[i].CreationTimestamp.Equal(s[j].CreationTimestamp) {
|
|
return afterOrZero(s[i].CreationTimestamp, s[j].CreationTimestamp)
|
|
}
|
|
return false
|
|
}
|
|
|
|
// afterOrZero checks if time t1 is after time t2; if one of them
|
|
// is zero, the zero time is seen as after non-zero time.
|
|
func afterOrZero(t1, t2 metav1.Time) bool {
|
|
if t1.Time.IsZero() || t2.Time.IsZero() {
|
|
return t1.Time.IsZero()
|
|
}
|
|
return t1.After(t2.Time)
|
|
}
|
|
|
|
func podReadyTime(pod *v1.Pod) metav1.Time {
|
|
if v1.IsPodReady(pod) {
|
|
for _, c := range pod.Status.Conditions {
|
|
// we only care about pod ready conditions
|
|
if c.Type == v1.PodReady && c.Status == v1.ConditionTrue {
|
|
return c.LastTransitionTime
|
|
}
|
|
}
|
|
}
|
|
return metav1.Time{}
|
|
}
|
|
|
|
func maxContainerRestarts(pod *v1.Pod) int {
|
|
maxRestarts := 0
|
|
for _, c := range pod.Status.ContainerStatuses {
|
|
maxRestarts = integer.IntMax(maxRestarts, int(c.RestartCount))
|
|
}
|
|
return maxRestarts
|
|
}
|
|
|
|
// FilterActivePods returns pods that have not terminated.
|
|
func FilterActivePods(pods []*v1.Pod) []*v1.Pod {
|
|
var result []*v1.Pod
|
|
for _, p := range pods {
|
|
if IsPodActive(p) {
|
|
result = append(result, p)
|
|
} else {
|
|
glog.V(4).Infof("Ignoring inactive pod %v/%v in state %v, deletion time %v",
|
|
p.Namespace, p.Name, p.Status.Phase, p.DeletionTimestamp)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func IsPodActive(p *v1.Pod) bool {
|
|
return v1.PodSucceeded != p.Status.Phase &&
|
|
v1.PodFailed != p.Status.Phase &&
|
|
p.DeletionTimestamp == nil
|
|
}
|
|
|
|
// FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods.
|
|
func FilterActiveReplicaSets(replicaSets []*extensions.ReplicaSet) []*extensions.ReplicaSet {
|
|
active := []*extensions.ReplicaSet{}
|
|
for i := range replicaSets {
|
|
rs := replicaSets[i]
|
|
|
|
if rs != nil && *(rs.Spec.Replicas) > 0 {
|
|
active = append(active, replicaSets[i])
|
|
}
|
|
}
|
|
return active
|
|
}
|
|
|
|
// PodKey returns a key unique to the given pod within a cluster.
|
|
// It's used so we consistently use the same key scheme in this module.
|
|
// It does exactly what cache.MetaNamespaceKeyFunc would have done
|
|
// except there's not possibility for error since we know the exact type.
|
|
func PodKey(pod *v1.Pod) string {
|
|
return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)
|
|
}
|
|
|
|
// ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker.
|
|
type ControllersByCreationTimestamp []*v1.ReplicationController
|
|
|
|
func (o ControllersByCreationTimestamp) Len() int { return len(o) }
|
|
func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
|
|
func (o ControllersByCreationTimestamp) Less(i, j int) bool {
|
|
if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
|
|
return o[i].Name < o[j].Name
|
|
}
|
|
return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
|
|
}
|
|
|
|
// ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker.
|
|
type ReplicaSetsByCreationTimestamp []*extensions.ReplicaSet
|
|
|
|
func (o ReplicaSetsByCreationTimestamp) Len() int { return len(o) }
|
|
func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
|
|
func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool {
|
|
if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
|
|
return o[i].Name < o[j].Name
|
|
}
|
|
return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
|
|
}
|
|
|
|
// ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
|
|
// By using the creation timestamp, this sorts from old to new replica sets.
|
|
type ReplicaSetsBySizeOlder []*extensions.ReplicaSet
|
|
|
|
func (o ReplicaSetsBySizeOlder) Len() int { return len(o) }
|
|
func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
|
|
func (o ReplicaSetsBySizeOlder) Less(i, j int) bool {
|
|
if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
|
|
return ReplicaSetsByCreationTimestamp(o).Less(i, j)
|
|
}
|
|
return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
|
|
}
|
|
|
|
// ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
|
|
// By using the creation timestamp, this sorts from new to old replica sets.
|
|
type ReplicaSetsBySizeNewer []*extensions.ReplicaSet
|
|
|
|
func (o ReplicaSetsBySizeNewer) Len() int { return len(o) }
|
|
func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
|
|
func (o ReplicaSetsBySizeNewer) Less(i, j int) bool {
|
|
if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
|
|
return ReplicaSetsByCreationTimestamp(o).Less(j, i)
|
|
}
|
|
return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
|
|
}
|