Vendor: Update k8s version

Signed-off-by: Michał Żyłowski <michal.zylowski@intel.com>
This commit is contained in:
Michał Żyłowski 2017-02-03 14:41:32 +01:00
parent dfa93414c5
commit 52baf68d50
3756 changed files with 113013 additions and 92675 deletions

View file

@ -0,0 +1,93 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"fakes.go",
"identity_mappers.go",
"iterator.go",
"stateful_set.go",
"stateful_set_utils.go",
"statefulpod.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/api/v1/pod:go_default_library",
"//pkg/apis/apps/v1beta1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/clientset_generated/clientset:go_default_library",
"//pkg/client/clientset_generated/clientset/typed/apps/v1beta1:go_default_library",
"//pkg/client/clientset_generated/clientset/typed/core/v1:go_default_library",
"//pkg/client/legacylisters:go_default_library",
"//pkg/client/record:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/util/workqueue:go_default_library",
"//vendor:github.com/golang/glog",
"//vendor:gopkg.in/inf.v0",
"//vendor:k8s.io/apimachinery/pkg/api/errors",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/runtime",
"//vendor:k8s.io/apimachinery/pkg/types",
"//vendor:k8s.io/apimachinery/pkg/util/errors",
"//vendor:k8s.io/apimachinery/pkg/util/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/util/wait",
"//vendor:k8s.io/apimachinery/pkg/watch",
],
)
go_test(
name = "go_default_test",
srcs = [
"identity_mappers_test.go",
"iterator_test.go",
"stateful_set_test.go",
"statefulpod_test.go",
],
library = ":go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/testapi:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/api/v1/pod:go_default_library",
"//pkg/apis/apps/v1beta1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/clientset_generated/clientset:go_default_library",
"//pkg/client/clientset_generated/clientset/fake:go_default_library",
"//pkg/client/clientset_generated/clientset/typed/apps/v1beta1:go_default_library",
"//pkg/client/clientset_generated/clientset/typed/apps/v1beta1/fake:go_default_library",
"//pkg/client/legacylisters:go_default_library",
"//pkg/client/testing/core:go_default_library",
"//pkg/controller:go_default_library",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/errors",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/client-go/rest",
"//vendor:k8s.io/client-go/util/testing",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

6
vendor/k8s.io/kubernetes/pkg/controller/statefulset/OWNERS generated vendored Executable file
View file

@ -0,0 +1,6 @@
reviewers:
- foxish
- bprashanth
- smarterclayton
- janetkuo
- kargakis

View file

@ -0,0 +1,327 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"time"
inf "gopkg.in/inf.v0"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
apipod "k8s.io/kubernetes/pkg/api/v1/pod"
apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1"
"k8s.io/kubernetes/pkg/client/record"
)
func dec(i int64, exponent int) *inf.Dec {
return inf.NewDec(i, inf.Scale(-exponent))
}
func newPVC(name string) v1.PersistentVolumeClaim {
return v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Spec: v1.PersistentVolumeClaimSpec{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceStorage: *resource.NewQuantity(1, resource.BinarySI),
},
},
},
}
}
func newStatefulSetWithVolumes(replicas int, name string, petMounts []v1.VolumeMount, podMounts []v1.VolumeMount) *apps.StatefulSet {
mounts := append(petMounts, podMounts...)
claims := []v1.PersistentVolumeClaim{}
for _, m := range petMounts {
claims = append(claims, newPVC(m.Name))
}
vols := []v1.Volume{}
for _, m := range podMounts {
vols = append(vols, v1.Volume{
Name: m.Name,
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{
Path: fmt.Sprintf("/tmp/%v", m.Name),
},
},
})
}
return &apps.StatefulSet{
TypeMeta: metav1.TypeMeta{
Kind: "StatefulSet",
APIVersion: "apps/v1beta1",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: metav1.NamespaceDefault,
UID: types.UID("test"),
},
Spec: apps.StatefulSetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{"foo": "bar"},
},
Replicas: func() *int32 { i := int32(replicas); return &i }(),
Template: v1.PodTemplateSpec{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "nginx",
Image: "nginx",
VolumeMounts: mounts,
},
},
Volumes: vols,
},
},
VolumeClaimTemplates: claims,
ServiceName: "governingsvc",
},
}
}
func runningPod(ns, name string) *v1.Pod {
p := &v1.Pod{Status: v1.PodStatus{Phase: v1.PodRunning}}
p.Namespace = ns
p.Name = name
return p
}
func newPodList(ps *apps.StatefulSet, num int) []*v1.Pod {
// knownPods are pods in the system
knownPods := []*v1.Pod{}
for i := 0; i < num; i++ {
k, _ := newPCB(fmt.Sprintf("%v", i), ps)
knownPods = append(knownPods, k.pod)
}
return knownPods
}
func newStatefulSet(replicas int) *apps.StatefulSet {
petMounts := []v1.VolumeMount{
{Name: "datadir", MountPath: "/tmp/zookeeper"},
}
podMounts := []v1.VolumeMount{
{Name: "home", MountPath: "/home"},
}
return newStatefulSetWithVolumes(replicas, "foo", petMounts, podMounts)
}
func checkPodForMount(pod *v1.Pod, mountName string) error {
for _, c := range pod.Spec.Containers {
for _, v := range c.VolumeMounts {
if v.Name == mountName {
return nil
}
}
}
return fmt.Errorf("Found volume but no associated mount %v in pod %v", mountName, pod.Name)
}
func newFakePetClient() *fakePetClient {
return &fakePetClient{
pets: []*pcb{},
claims: []v1.PersistentVolumeClaim{},
recorder: &record.FakeRecorder{},
petHealthChecker: &defaultPetHealthChecker{},
}
}
type fakePetClient struct {
pets []*pcb
claims []v1.PersistentVolumeClaim
petsCreated int
petsDeleted int
claimsCreated int
claimsDeleted int
recorder record.EventRecorder
petHealthChecker
}
// Delete fakes pet client deletion.
func (f *fakePetClient) Delete(p *pcb) error {
pets := []*pcb{}
found := false
for i, pet := range f.pets {
if p.pod.Name == pet.pod.Name {
found = true
f.recorder.Eventf(pet.parent, v1.EventTypeNormal, "SuccessfulDelete", "pod: %v", pet.pod.Name)
continue
}
pets = append(pets, f.pets[i])
}
if !found {
// TODO: Return proper not found error
return fmt.Errorf("Delete failed: pod %v doesn't exist", p.pod.Name)
}
f.pets = pets
f.petsDeleted++
return nil
}
// Get fakes getting pets.
func (f *fakePetClient) Get(p *pcb) (*pcb, bool, error) {
for i, pet := range f.pets {
if p.pod.Name == pet.pod.Name {
return f.pets[i], true, nil
}
}
return nil, false, nil
}
// Create fakes pet creation.
func (f *fakePetClient) Create(p *pcb) error {
for _, pet := range f.pets {
if p.pod.Name == pet.pod.Name {
return fmt.Errorf("Create failed: pod %v already exists", p.pod.Name)
}
}
f.recorder.Eventf(p.parent, v1.EventTypeNormal, "SuccessfulCreate", "pod: %v", p.pod.Name)
f.pets = append(f.pets, p)
f.petsCreated++
return nil
}
// Update fakes pet updates.
func (f *fakePetClient) Update(expected, wanted *pcb) error {
found := false
pets := []*pcb{}
for i, pet := range f.pets {
if wanted.pod.Name == pet.pod.Name {
f.pets[i].pod.Annotations[apipod.PodHostnameAnnotation] = wanted.pod.Annotations[apipod.PodHostnameAnnotation]
f.pets[i].pod.Annotations[apipod.PodSubdomainAnnotation] = wanted.pod.Annotations[apipod.PodSubdomainAnnotation]
f.pets[i].pod.Spec = wanted.pod.Spec
found = true
}
pets = append(pets, f.pets[i])
}
f.pets = pets
if !found {
return fmt.Errorf("Cannot update pod %v not found", wanted.pod.Name)
}
// TODO: Delete pvcs/volumes that are in wanted but not in expected.
return nil
}
func (f *fakePetClient) getPodList() []*v1.Pod {
p := []*v1.Pod{}
for i, pet := range f.pets {
if pet.pod == nil {
continue
}
p = append(p, f.pets[i].pod)
}
return p
}
func (f *fakePetClient) deletePetAtIndex(index int) {
p := []*pcb{}
for i := range f.pets {
if i != index {
p = append(p, f.pets[i])
}
}
f.pets = p
}
func (f *fakePetClient) setHealthy(index int) error {
if len(f.pets) <= index {
return fmt.Errorf("Index out of range, len %v index %v", len(f.pets), index)
}
f.pets[index].pod.Status.Phase = v1.PodRunning
f.pets[index].pod.Annotations[StatefulSetInitAnnotation] = "true"
f.pets[index].pod.Status.Conditions = []v1.PodCondition{
{Type: v1.PodReady, Status: v1.ConditionTrue},
}
return nil
}
// isHealthy is a convenience wrapper around the default health checker.
// The first invocation returns not-healthy, but marks the pet healthy so
// subsequent invocations see it as healthy.
func (f *fakePetClient) isHealthy(pod *v1.Pod) bool {
if f.petHealthChecker.isHealthy(pod) {
return true
}
return false
}
func (f *fakePetClient) setDeletionTimestamp(index int) error {
if len(f.pets) <= index {
return fmt.Errorf("Index out of range, len %v index %v", len(f.pets), index)
}
f.pets[index].pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return nil
}
// SyncPVCs fakes pvc syncing.
func (f *fakePetClient) SyncPVCs(pet *pcb) error {
v := pet.pvcs
updateClaims := map[string]v1.PersistentVolumeClaim{}
for i, update := range v {
updateClaims[update.Name] = v[i]
}
claimList := []v1.PersistentVolumeClaim{}
for i, existing := range f.claims {
if update, ok := updateClaims[existing.Name]; ok {
claimList = append(claimList, update)
delete(updateClaims, existing.Name)
} else {
claimList = append(claimList, f.claims[i])
}
}
for _, remaining := range updateClaims {
claimList = append(claimList, remaining)
f.claimsCreated++
f.recorder.Eventf(pet.parent, v1.EventTypeNormal, "SuccessfulCreate", "pvc: %v", remaining.Name)
}
f.claims = claimList
return nil
}
// DeletePVCs fakes pvc deletion.
func (f *fakePetClient) DeletePVCs(pet *pcb) error {
claimsToDelete := pet.pvcs
deleteClaimNames := sets.NewString()
for _, c := range claimsToDelete {
deleteClaimNames.Insert(c.Name)
}
pvcs := []v1.PersistentVolumeClaim{}
for i, existing := range f.claims {
if deleteClaimNames.Has(existing.Name) {
deleteClaimNames.Delete(existing.Name)
f.claimsDeleted++
f.recorder.Eventf(pet.parent, v1.EventTypeNormal, "SuccessfulDelete", "pvc: %v", existing.Name)
continue
}
pvcs = append(pvcs, f.claims[i])
}
f.claims = pvcs
if deleteClaimNames.Len() != 0 {
return fmt.Errorf("Claims %+v don't exist. Failed deletion.", deleteClaimNames)
}
return nil
}

View file

@ -0,0 +1,247 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"crypto/md5"
"fmt"
"sort"
"strings"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api/v1"
podapi "k8s.io/kubernetes/pkg/api/v1/pod"
apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1"
)
// identityMapper is an interface for assigning identities to a pet.
// All existing identity mappers just append "-(index)" to the statefulset name to
// generate a unique identity. This is used in claims/DNS/hostname/petname
// etc. There's a more elegant way to achieve this mapping, but we're
// taking the simplest route till we have data on whether users will need
// more customization.
// Note that running a single identity mapper is not guaranteed to give
// your pet a unique identity. You must run them all. Order doesn't matter.
type identityMapper interface {
// SetIdentity takes an id and assigns the given pet an identity based
// on the stateful set spec. The is must be unique amongst members of the
// stateful set.
SetIdentity(id string, pet *v1.Pod)
// Identity returns the identity of the pet.
Identity(pod *v1.Pod) string
}
func newIdentityMappers(ps *apps.StatefulSet) []identityMapper {
return []identityMapper{
&NameIdentityMapper{ps},
&NetworkIdentityMapper{ps},
&VolumeIdentityMapper{ps},
}
}
// NetworkIdentityMapper assigns network identity to pets.
type NetworkIdentityMapper struct {
ps *apps.StatefulSet
}
// SetIdentity sets network identity on the pet.
func (n *NetworkIdentityMapper) SetIdentity(id string, pet *v1.Pod) {
pet.Annotations[podapi.PodHostnameAnnotation] = fmt.Sprintf("%v-%v", n.ps.Name, id)
pet.Annotations[podapi.PodSubdomainAnnotation] = n.ps.Spec.ServiceName
return
}
// Identity returns the network identity of the pet.
func (n *NetworkIdentityMapper) Identity(pet *v1.Pod) string {
return n.String(pet)
}
// String is a string function for the network identity of the pet.
func (n *NetworkIdentityMapper) String(pet *v1.Pod) string {
hostname := pet.Annotations[podapi.PodHostnameAnnotation]
subdomain := pet.Annotations[podapi.PodSubdomainAnnotation]
return strings.Join([]string{hostname, subdomain, n.ps.Namespace}, ".")
}
// VolumeIdentityMapper assigns storage identity to pets.
type VolumeIdentityMapper struct {
ps *apps.StatefulSet
}
// SetIdentity sets storage identity on the pet.
func (v *VolumeIdentityMapper) SetIdentity(id string, pet *v1.Pod) {
petVolumes := []v1.Volume{}
petClaims := v.GetClaims(id)
// These volumes will all go down with the pod. If a name matches one of
// the claims in the stateful set, it gets clobbered.
podVolumes := map[string]v1.Volume{}
for _, podVol := range pet.Spec.Volumes {
podVolumes[podVol.Name] = podVol
}
// Insert claims for the idempotent statefulset volumes
for name, claim := range petClaims {
// Volumes on a pet for which there are no associated claims on the
// statefulset are pod local, and die with the pod.
podVol, ok := podVolumes[name]
if ok {
// TODO: Validate and reject this.
glog.V(4).Infof("Overwriting existing volume source %v", podVol.Name)
}
newVol := v1.Volume{
Name: name,
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: claim.Name,
// TODO: Use source definition to set this value when we have one.
ReadOnly: false,
},
},
}
petVolumes = append(petVolumes, newVol)
}
// Transfer any ephemeral pod volumes
for name, vol := range podVolumes {
if _, ok := petClaims[name]; !ok {
petVolumes = append(petVolumes, vol)
}
}
pet.Spec.Volumes = petVolumes
return
}
// Identity returns the storage identity of the pet.
func (v *VolumeIdentityMapper) Identity(pet *v1.Pod) string {
// TODO: Make this a hash?
return v.String(pet)
}
// String is a string function for the network identity of the pet.
func (v *VolumeIdentityMapper) String(pet *v1.Pod) string {
ids := []string{}
petVols := sets.NewString()
for _, petVol := range v.ps.Spec.VolumeClaimTemplates {
petVols.Insert(petVol.Name)
}
for _, podVol := range pet.Spec.Volumes {
// Volumes on a pet for which there are no associated claims on the
// statefulset are pod local, and die with the pod.
if !petVols.Has(podVol.Name) {
continue
}
if podVol.VolumeSource.PersistentVolumeClaim == nil {
// TODO: Is this a part of the identity?
ids = append(ids, fmt.Sprintf("%v:None", podVol.Name))
continue
}
ids = append(ids, fmt.Sprintf("%v:%v", podVol.Name, podVol.VolumeSource.PersistentVolumeClaim.ClaimName))
}
sort.Strings(ids)
return strings.Join(ids, "")
}
// GetClaims returns the volume claims associated with the given id.
// The claims belong to the statefulset. The id should be unique within a statefulset.
func (v *VolumeIdentityMapper) GetClaims(id string) map[string]v1.PersistentVolumeClaim {
petClaims := map[string]v1.PersistentVolumeClaim{}
for _, pvc := range v.ps.Spec.VolumeClaimTemplates {
claim := pvc
// TODO: Name length checking in validation.
claim.Name = fmt.Sprintf("%v-%v-%v", claim.Name, v.ps.Name, id)
claim.Namespace = v.ps.Namespace
claim.Labels = v.ps.Spec.Selector.MatchLabels
// TODO: We're assuming that the claim template has a volume QoS key, eg:
// volume.alpha.kubernetes.io/storage-class: anything
petClaims[pvc.Name] = claim
}
return petClaims
}
// GetClaimsForPet returns the pvcs for the given pet.
func (v *VolumeIdentityMapper) GetClaimsForPet(pet *v1.Pod) []v1.PersistentVolumeClaim {
// Strip out the "-(index)" from the pet name and use it to generate
// claim names.
id := strings.Split(pet.Name, "-")
petID := id[len(id)-1]
pvcs := []v1.PersistentVolumeClaim{}
for _, pvc := range v.GetClaims(petID) {
pvcs = append(pvcs, pvc)
}
return pvcs
}
// NameIdentityMapper assigns names to pets.
// It also puts the pet in the same namespace as the parent.
type NameIdentityMapper struct {
ps *apps.StatefulSet
}
// SetIdentity sets the pet namespace and name.
func (n *NameIdentityMapper) SetIdentity(id string, pet *v1.Pod) {
pet.Name = fmt.Sprintf("%v-%v", n.ps.Name, id)
pet.Namespace = n.ps.Namespace
return
}
// Identity returns the name identity of the pet.
func (n *NameIdentityMapper) Identity(pet *v1.Pod) string {
return n.String(pet)
}
// String is a string function for the name identity of the pet.
func (n *NameIdentityMapper) String(pet *v1.Pod) string {
return fmt.Sprintf("%v/%v", pet.Namespace, pet.Name)
}
// identityHash computes a hash of the pet by running all the above identity
// mappers.
func identityHash(ps *apps.StatefulSet, pet *v1.Pod) string {
id := ""
for _, idMapper := range newIdentityMappers(ps) {
id += idMapper.Identity(pet)
}
return fmt.Sprintf("%x", md5.Sum([]byte(id)))
}
// copyPetID gives the realPet the same identity as the expectedPet.
// Note that this is *not* a literal copy, but a copy of the fields that
// contribute to the pet's identity. The returned boolean 'needsUpdate' will
// be false if the realPet already has the same identity as the expectedPet.
func copyPetID(realPet, expectedPet *pcb) (pod v1.Pod, needsUpdate bool, err error) {
if realPet.pod == nil || expectedPet.pod == nil {
return pod, false, fmt.Errorf("Need a valid to and from pet for copy")
}
if realPet.parent.UID != expectedPet.parent.UID {
return pod, false, fmt.Errorf("Cannot copy pets with different parents")
}
ps := realPet.parent
if identityHash(ps, realPet.pod) == identityHash(ps, expectedPet.pod) {
return *realPet.pod, false, nil
}
copyPod := *realPet.pod
// This is the easiest way to give an identity to a pod. It won't work
// when we stop using names for id.
for _, idMapper := range newIdentityMappers(ps) {
idMapper.SetIdentity(expectedPet.id, &copyPod)
}
return copyPod, true, nil
}

View file

@ -0,0 +1,180 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"reflect"
"strings"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
apipod "k8s.io/kubernetes/pkg/api/v1/pod"
)
func TestPetIDName(t *testing.T) {
replicas := 3
ps := newStatefulSet(replicas)
for i := 0; i < replicas; i++ {
petName := fmt.Sprintf("%v-%d", ps.Name, i)
pcb, err := newPCB(fmt.Sprintf("%d", i), ps)
if err != nil {
t.Fatalf("Failed to generate pet %v", err)
}
pod := pcb.pod
if pod.Name != petName || pod.Namespace != ps.Namespace {
t.Errorf("Wrong name identity, expected %v", pcb.pod.Name)
}
}
}
func TestPetIDDNS(t *testing.T) {
replicas := 3
ps := newStatefulSet(replicas)
for i := 0; i < replicas; i++ {
petName := fmt.Sprintf("%v-%d", ps.Name, i)
petSubdomain := ps.Spec.ServiceName
pcb, err := newPCB(fmt.Sprintf("%d", i), ps)
pod := pcb.pod
if err != nil {
t.Fatalf("Failed to generate pet %v", err)
}
if hostname, ok := pod.Annotations[apipod.PodHostnameAnnotation]; !ok || hostname != petName {
t.Errorf("Wrong hostname: %v", hostname)
}
// TODO: Check this against the governing service.
if subdomain, ok := pod.Annotations[apipod.PodSubdomainAnnotation]; !ok || subdomain != petSubdomain {
t.Errorf("Wrong subdomain: %v", subdomain)
}
}
}
func TestPetIDVolume(t *testing.T) {
replicas := 3
ps := newStatefulSet(replicas)
for i := 0; i < replicas; i++ {
pcb, err := newPCB(fmt.Sprintf("%d", i), ps)
if err != nil {
t.Fatalf("Failed to generate pet %v", err)
}
pod := pcb.pod
petName := fmt.Sprintf("%v-%d", ps.Name, i)
claimName := fmt.Sprintf("datadir-%v", petName)
for _, v := range pod.Spec.Volumes {
switch v.Name {
case "datadir":
c := v.VolumeSource.PersistentVolumeClaim
if c == nil || c.ClaimName != claimName {
t.Fatalf("Unexpected claim %v", c)
}
if err := checkPodForMount(pod, "datadir"); err != nil {
t.Errorf("Expected pod mount: %v", err)
}
case "home":
h := v.VolumeSource.HostPath
if h == nil || h.Path != "/tmp/home" {
t.Errorf("Unexpected modification to hostpath, expected /tmp/home got %+v", h)
}
default:
t.Errorf("Unexpected volume %v", v.Name)
}
}
}
// TODO: Check volume mounts.
}
func TestPetIDVolumeClaims(t *testing.T) {
replicas := 3
ps := newStatefulSet(replicas)
for i := 0; i < replicas; i++ {
pcb, err := newPCB(fmt.Sprintf("%v", i), ps)
if err != nil {
t.Fatalf("Failed to generate pet %v", err)
}
pvcs := pcb.pvcs
petName := fmt.Sprintf("%v-%d", ps.Name, i)
claimName := fmt.Sprintf("datadir-%v", petName)
if len(pvcs) != 1 || pvcs[0].Name != claimName {
t.Errorf("Wrong pvc expected %v got %v", claimName, pvcs[0].Name)
}
}
}
func TestPetIDCrossAssignment(t *testing.T) {
replicas := 3
ps := newStatefulSet(replicas)
nameMapper := &NameIdentityMapper{ps}
volumeMapper := &VolumeIdentityMapper{ps}
networkMapper := &NetworkIdentityMapper{ps}
// Check that the name is consistent across identity.
for i := 0; i < replicas; i++ {
pet, _ := newPCB(fmt.Sprintf("%v", i), ps)
p := pet.pod
name := strings.Split(nameMapper.Identity(p), "/")[1]
network := networkMapper.Identity(p)
volume := volumeMapper.Identity(p)
petVolume := strings.Split(volume, ":")[1]
if petVolume != fmt.Sprintf("datadir-%v", name) {
t.Errorf("Unexpected pet volume name %v, expected %v", petVolume, name)
}
if network != fmt.Sprintf("%v.%v.%v", name, ps.Spec.ServiceName, ps.Namespace) {
t.Errorf("Unexpected pet network ID %v, expected %v", network, name)
}
t.Logf("[%v] volume: %+v, network: %+v, name: %+v", i, volume, network, name)
}
}
func TestPetIDReset(t *testing.T) {
replicas := 2
ps := newStatefulSet(replicas)
firstPCB, err := newPCB("1", ps)
secondPCB, err := newPCB("2", ps)
if identityHash(ps, firstPCB.pod) == identityHash(ps, secondPCB.pod) {
t.Fatalf("Failed to generate uniquey identities:\n%+v\n%+v", firstPCB.pod.Spec, secondPCB.pod.Spec)
}
userAdded := v1.Volume{
Name: "test",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{Medium: v1.StorageMediumMemory},
},
}
firstPCB.pod.Spec.Volumes = append(firstPCB.pod.Spec.Volumes, userAdded)
pod, needsUpdate, err := copyPetID(firstPCB, secondPCB)
if err != nil {
t.Errorf("%v", err)
}
if !needsUpdate {
t.Errorf("expected update since identity of %v was reset", secondPCB.pod.Name)
}
if identityHash(ps, &pod) != identityHash(ps, secondPCB.pod) {
t.Errorf("Failed to copy identity for pod %v -> %v", firstPCB.pod.Name, secondPCB.pod.Name)
}
foundVol := false
for _, v := range pod.Spec.Volumes {
if reflect.DeepEqual(v, userAdded) {
foundVol = true
break
}
}
if !foundVol {
t.Errorf("User added volume was corrupted by reset action.")
}
}

View file

@ -0,0 +1,163 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"sort"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1"
"k8s.io/kubernetes/pkg/controller"
)
// newPCB generates a new PCB using the id string as a unique qualifier
func newPCB(id string, ps *apps.StatefulSet) (*pcb, error) {
petPod, err := controller.GetPodFromTemplate(&ps.Spec.Template, ps, nil)
if err != nil {
return nil, err
}
for _, im := range newIdentityMappers(ps) {
im.SetIdentity(id, petPod)
}
petPVCs := []v1.PersistentVolumeClaim{}
vMapper := &VolumeIdentityMapper{ps}
for _, c := range vMapper.GetClaims(id) {
petPVCs = append(petPVCs, c)
}
// TODO: Replace id field with IdentityHash, since id is more than just an index.
return &pcb{pod: petPod, pvcs: petPVCs, id: id, parent: ps}, nil
}
// petQueue is a custom datastructure that's resembles a queue of pets.
type petQueue struct {
pets []*pcb
idMapper identityMapper
}
// enqueue enqueues the given pet, evicting any pets with the same id
func (pt *petQueue) enqueue(p *pcb) {
if p == nil {
pt.pets = append(pt.pets, nil)
return
}
// Pop an existing pet from the know list, append the new pet to the end.
petList := []*pcb{}
petID := pt.idMapper.Identity(p.pod)
for i := range pt.pets {
if petID != pt.idMapper.Identity(pt.pets[i].pod) {
petList = append(petList, pt.pets[i])
}
}
pt.pets = petList
p.event = syncPet
pt.pets = append(pt.pets, p)
}
// dequeue returns the last element of the queue
func (pt *petQueue) dequeue() *pcb {
if pt.empty() {
glog.Warningf("Dequeue invoked on an empty queue")
return nil
}
l := len(pt.pets) - 1
pet := pt.pets[l]
pt.pets = pt.pets[:l]
return pet
}
// empty returns true if the pet queue is empty.
func (pt *petQueue) empty() bool {
return len(pt.pets) == 0
}
// NewPetQueue returns a queue for tracking pets
func NewPetQueue(ps *apps.StatefulSet, podList []*v1.Pod) *petQueue {
pt := petQueue{pets: []*pcb{}, idMapper: &NameIdentityMapper{ps}}
// Seed the queue with existing pets. Assume all pets are scheduled for
// deletion, enqueuing a pet will "undelete" it. We always want to delete
// from the higher ids, so sort by creation timestamp.
sort.Sort(PodsByCreationTimestamp(podList))
vMapper := VolumeIdentityMapper{ps}
for i := range podList {
pod := podList[i]
pt.pets = append(pt.pets, &pcb{pod: pod, pvcs: vMapper.GetClaimsForPet(pod), parent: ps, event: deletePet, id: fmt.Sprintf("%v", i)})
}
return &pt
}
// statefulsetIterator implements a simple iterator over pets in the given statefulset.
type statefulSetIterator struct {
// ps is the statefulset for this iterator.
ps *apps.StatefulSet
// queue contains the elements to iterate over.
queue *petQueue
// errs is a list because we always want the iterator to drain.
errs []error
// petCount is the number of pets iterated over.
petCount int32
}
// Next returns true for as long as there are elements in the underlying queue.
func (pi *statefulSetIterator) Next() bool {
var pet *pcb
var err error
if pi.petCount < *(pi.ps.Spec.Replicas) {
pet, err = newPCB(fmt.Sprintf("%d", pi.petCount), pi.ps)
if err != nil {
pi.errs = append(pi.errs, err)
// Don't stop iterating over the set on errors. Caller handles nil.
pet = nil
}
pi.queue.enqueue(pet)
pi.petCount++
}
// Keep the iterator running till we've deleted pets in the queue.
return !pi.queue.empty()
}
// Value dequeues an element from the queue.
func (pi *statefulSetIterator) Value() *pcb {
return pi.queue.dequeue()
}
// NewStatefulSetIterator returns a new iterator. All pods in the given podList
// are used to seed the queue of the iterator.
func NewStatefulSetIterator(ps *apps.StatefulSet, podList []*v1.Pod) *statefulSetIterator {
pi := &statefulSetIterator{
ps: ps,
queue: NewPetQueue(ps, podList),
errs: []error{},
petCount: 0,
}
return pi
}
// PodsByCreationTimestamp sorts a list of Pods by creation timestamp, using their names as a tie breaker.
type PodsByCreationTimestamp []*v1.Pod
func (o PodsByCreationTimestamp) Len() int { return len(o) }
func (o PodsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
func (o PodsByCreationTimestamp) Less(i, j int) bool {
if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
return o[i].Name < o[j].Name
}
return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
}

View file

@ -0,0 +1,150 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"testing"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api/v1"
)
func TestPetQueueCreates(t *testing.T) {
replicas := 3
ps := newStatefulSet(replicas)
q := NewPetQueue(ps, []*v1.Pod{})
for i := 0; i < replicas; i++ {
pet, _ := newPCB(fmt.Sprintf("%v", i), ps)
q.enqueue(pet)
p := q.dequeue()
if p.event != syncPet {
t.Errorf("Failed to retrieve sync event from queue")
}
}
if q.dequeue() != nil {
t.Errorf("Expected no pods")
}
}
func TestPetQueueScaleDown(t *testing.T) {
replicas := 1
ps := newStatefulSet(replicas)
// knownPods are the pods in the system
knownPods := newPodList(ps, 3)
q := NewPetQueue(ps, knownPods)
// The iterator will insert a single replica, the enqueue
// mimics that behavior.
pet, _ := newPCB(fmt.Sprintf("%v", 0), ps)
q.enqueue(pet)
deletes := sets.NewString(fmt.Sprintf("%v-1", ps.Name), fmt.Sprintf("%v-2", ps.Name))
syncs := sets.NewString(fmt.Sprintf("%v-0", ps.Name))
// Confirm that 2 known pods are deleted
for i := 0; i < 3; i++ {
p := q.dequeue()
switch p.event {
case syncPet:
if !syncs.Has(p.pod.Name) {
t.Errorf("Unexpected sync %v expecting %+v", p.pod.Name, syncs)
}
case deletePet:
if !deletes.Has(p.pod.Name) {
t.Errorf("Unexpected deletes %v expecting %+v", p.pod.Name, deletes)
}
}
}
if q.dequeue() != nil {
t.Errorf("Expected no pods")
}
}
func TestPetQueueScaleUp(t *testing.T) {
replicas := 5
ps := newStatefulSet(replicas)
// knownPods are pods in the system
knownPods := newPodList(ps, 2)
q := NewPetQueue(ps, knownPods)
for i := 0; i < 5; i++ {
pet, _ := newPCB(fmt.Sprintf("%v", i), ps)
q.enqueue(pet)
}
for i := 4; i >= 0; i-- {
pet := q.dequeue()
expectedName := fmt.Sprintf("%v-%d", ps.Name, i)
if pet.event != syncPet || pet.pod.Name != expectedName {
t.Errorf("Unexpected pod %+v, expected %v", pet.pod.Name, expectedName)
}
}
}
func TestStatefulSetIteratorRelist(t *testing.T) {
replicas := 5
ps := newStatefulSet(replicas)
// knownPods are pods in the system
knownPods := newPodList(ps, 5)
for i := range knownPods {
knownPods[i].Spec.NodeName = fmt.Sprintf("foo-node-%v", i)
knownPods[i].Status.Phase = v1.PodRunning
}
pi := NewStatefulSetIterator(ps, knownPods)
// A simple resync should not change identity of pods in the system
i := 0
for pi.Next() {
p := pi.Value()
if identityHash(ps, p.pod) != identityHash(ps, knownPods[i]) {
t.Errorf("Got unexpected identity hash from iterator.")
}
if p.event != syncPet {
t.Errorf("Got unexpected sync event for %v: %v", p.pod.Name, p.event)
}
i++
}
if i != 5 {
t.Errorf("Unexpected iterations %v, this probably means too many/few pods", i)
}
// Scale to 0 should delete all pods in system
*(ps.Spec.Replicas) = 0
pi = NewStatefulSetIterator(ps, knownPods)
i = 0
for pi.Next() {
p := pi.Value()
if p.event != deletePet {
t.Errorf("Got unexpected sync event for %v: %v", p.pod.Name, p.event)
}
i++
}
if i != 5 {
t.Errorf("Unexpected iterations %v, this probably means too many/few pods", i)
}
// Relist with 0 replicas should no-op
pi = NewStatefulSetIterator(ps, []*v1.Pod{})
if pi.Next() != false {
t.Errorf("Unexpected iteration without any replicas or pods in system")
}
}

View file

@ -0,0 +1,371 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"reflect"
"sort"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/v1"
apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1"
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
v1core "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/core/v1"
"k8s.io/kubernetes/pkg/client/legacylisters"
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/util/workqueue"
"github.com/golang/glog"
)
const (
// Time to sleep before polling to see if the pod cache has synced.
PodStoreSyncedPollPeriod = 100 * time.Millisecond
// number of retries for a status update.
statusUpdateRetries = 2
// period to relist statefulsets and verify pets
statefulSetResyncPeriod = 30 * time.Second
)
// StatefulSetController controls statefulsets.
type StatefulSetController struct {
kubeClient clientset.Interface
// newSyncer returns an interface capable of syncing a single pet.
// Abstracted out for testing.
newSyncer func(*pcb) *petSyncer
// podStore is a cache of watched pods.
podStore listers.StoreToPodLister
// podStoreSynced returns true if the pod store has synced at least once.
podStoreSynced func() bool
// Watches changes to all pods.
podController cache.Controller
// A store of StatefulSets, populated by the psController.
psStore listers.StoreToStatefulSetLister
// Watches changes to all StatefulSets.
psController cache.Controller
// A store of the 1 unhealthy pet blocking progress for a given ps
blockingPetStore *unhealthyPetTracker
// Controllers that need to be synced.
queue workqueue.RateLimitingInterface
// syncHandler handles sync events for statefulsets.
// Abstracted as a func to allow injection for testing.
syncHandler func(psKey string) error
}
// NewStatefulSetController creates a new statefulset controller.
func NewStatefulSetController(podInformer cache.SharedIndexInformer, kubeClient clientset.Interface, resyncPeriod time.Duration) *StatefulSetController {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(glog.Infof)
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.Core().Events("")})
recorder := eventBroadcaster.NewRecorder(v1.EventSource{Component: "statefulset"})
pc := &apiServerPetClient{kubeClient, recorder, &defaultPetHealthChecker{}}
psc := &StatefulSetController{
kubeClient: kubeClient,
blockingPetStore: newUnHealthyPetTracker(pc),
newSyncer: func(blockingPet *pcb) *petSyncer {
return &petSyncer{pc, blockingPet}
},
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "statefulset"),
}
podInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
// lookup the statefulset and enqueue
AddFunc: psc.addPod,
// lookup current and old statefulset if labels changed
UpdateFunc: psc.updatePod,
// lookup statefulset accounting for deletion tombstones
DeleteFunc: psc.deletePod,
})
psc.podStore.Indexer = podInformer.GetIndexer()
psc.podController = podInformer.GetController()
psc.psStore.Store, psc.psController = cache.NewInformer(
&cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
return psc.kubeClient.Apps().StatefulSets(metav1.NamespaceAll).List(options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
return psc.kubeClient.Apps().StatefulSets(metav1.NamespaceAll).Watch(options)
},
},
&apps.StatefulSet{},
statefulSetResyncPeriod,
cache.ResourceEventHandlerFuncs{
AddFunc: psc.enqueueStatefulSet,
UpdateFunc: func(old, cur interface{}) {
oldPS := old.(*apps.StatefulSet)
curPS := cur.(*apps.StatefulSet)
if oldPS.Status.Replicas != curPS.Status.Replicas {
glog.V(4).Infof("Observed updated replica count for StatefulSet: %v, %d->%d", curPS.Name, oldPS.Status.Replicas, curPS.Status.Replicas)
}
psc.enqueueStatefulSet(cur)
},
DeleteFunc: psc.enqueueStatefulSet,
},
)
// TODO: Watch volumes
psc.podStoreSynced = psc.podController.HasSynced
psc.syncHandler = psc.Sync
return psc
}
// Run runs the statefulset controller.
func (psc *StatefulSetController) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
glog.Infof("Starting statefulset controller")
go psc.podController.Run(stopCh)
go psc.psController.Run(stopCh)
for i := 0; i < workers; i++ {
go wait.Until(psc.worker, time.Second, stopCh)
}
<-stopCh
glog.Infof("Shutting down statefulset controller")
psc.queue.ShutDown()
}
// addPod adds the statefulset for the pod to the sync queue
func (psc *StatefulSetController) addPod(obj interface{}) {
pod := obj.(*v1.Pod)
glog.V(4).Infof("Pod %s created, labels: %+v", pod.Name, pod.Labels)
ps := psc.getStatefulSetForPod(pod)
if ps == nil {
return
}
psc.enqueueStatefulSet(ps)
}
// updatePod adds the statefulset for the current and old pods to the sync queue.
// If the labels of the pod didn't change, this method enqueues a single statefulset.
func (psc *StatefulSetController) updatePod(old, cur interface{}) {
curPod := cur.(*v1.Pod)
oldPod := old.(*v1.Pod)
if curPod.ResourceVersion == oldPod.ResourceVersion {
// Periodic resync will send update events for all known pods.
// Two different versions of the same pod will always have different RVs.
return
}
ps := psc.getStatefulSetForPod(curPod)
if ps == nil {
return
}
psc.enqueueStatefulSet(ps)
if !reflect.DeepEqual(curPod.Labels, oldPod.Labels) {
if oldPS := psc.getStatefulSetForPod(oldPod); oldPS != nil {
psc.enqueueStatefulSet(oldPS)
}
}
}
// deletePod enqueues the statefulset for the pod accounting for deletion tombstones.
func (psc *StatefulSetController) deletePod(obj interface{}) {
pod, ok := obj.(*v1.Pod)
// When a delete is dropped, the relist will notice a pod in the store not
// in the list, leading to the insertion of a tombstone object which contains
// the deleted key/value. Note that this value might be stale. If the pod
// changed labels the new StatefulSet will not be woken up till the periodic resync.
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
glog.Errorf("couldn't get object from tombstone %+v", obj)
return
}
pod, ok = tombstone.Obj.(*v1.Pod)
if !ok {
glog.Errorf("tombstone contained object that is not a pod %+v", obj)
return
}
}
glog.V(4).Infof("Pod %s/%s deleted through %v.", pod.Namespace, pod.Name, utilruntime.GetCaller())
if ps := psc.getStatefulSetForPod(pod); ps != nil {
psc.enqueueStatefulSet(ps)
}
}
// getPodsForStatefulSets returns the pods that match the selectors of the given statefulset.
func (psc *StatefulSetController) getPodsForStatefulSet(ps *apps.StatefulSet) ([]*v1.Pod, error) {
// TODO: Do we want the statefulset to fight with RCs? check parent statefulset annotation, or name prefix?
sel, err := metav1.LabelSelectorAsSelector(ps.Spec.Selector)
if err != nil {
return []*v1.Pod{}, err
}
pods, err := psc.podStore.Pods(ps.Namespace).List(sel)
if err != nil {
return []*v1.Pod{}, err
}
// TODO: Do we need to copy?
result := make([]*v1.Pod, 0, len(pods))
for i := range pods {
result = append(result, &(*pods[i]))
}
return result, nil
}
// getStatefulSetForPod returns the pet set managing the given pod.
func (psc *StatefulSetController) getStatefulSetForPod(pod *v1.Pod) *apps.StatefulSet {
ps, err := psc.psStore.GetPodStatefulSets(pod)
if err != nil {
glog.V(4).Infof("No StatefulSets found for pod %v, StatefulSet controller will avoid syncing", pod.Name)
return nil
}
// Resolve a overlapping statefulset tie by creation timestamp.
// Let's hope users don't create overlapping statefulsets.
if len(ps) > 1 {
glog.Errorf("user error! more than one StatefulSet is selecting pods with labels: %+v", pod.Labels)
sort.Sort(overlappingStatefulSets(ps))
}
return &ps[0]
}
// enqueueStatefulSet enqueues the given statefulset in the work queue.
func (psc *StatefulSetController) enqueueStatefulSet(obj interface{}) {
key, err := controller.KeyFunc(obj)
if err != nil {
glog.Errorf("Cound't get key for object %+v: %v", obj, err)
return
}
psc.queue.Add(key)
}
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (psc *StatefulSetController) worker() {
for {
func() {
key, quit := psc.queue.Get()
if quit {
return
}
defer psc.queue.Done(key)
if err := psc.syncHandler(key.(string)); err != nil {
glog.Errorf("Error syncing StatefulSet %v, requeuing: %v", key.(string), err)
psc.queue.AddRateLimited(key)
} else {
psc.queue.Forget(key)
}
}()
}
}
// Sync syncs the given statefulset.
func (psc *StatefulSetController) Sync(key string) error {
startTime := time.Now()
defer func() {
glog.V(4).Infof("Finished syncing statefulset %q (%v)", key, time.Now().Sub(startTime))
}()
if !psc.podStoreSynced() {
// Sleep so we give the pod reflector goroutine a chance to run.
time.Sleep(PodStoreSyncedPollPeriod)
return fmt.Errorf("waiting for pods controller to sync")
}
obj, exists, err := psc.psStore.Store.GetByKey(key)
if !exists {
if err = psc.blockingPetStore.store.Delete(key); err != nil {
return err
}
glog.Infof("StatefulSet has been deleted %v", key)
return nil
}
if err != nil {
glog.Errorf("Unable to retrieve StatefulSet %v from store: %v", key, err)
return err
}
ps := *obj.(*apps.StatefulSet)
petList, err := psc.getPodsForStatefulSet(&ps)
if err != nil {
return err
}
numPets, syncErr := psc.syncStatefulSet(&ps, petList)
if updateErr := updatePetCount(psc.kubeClient.Apps(), ps, numPets); updateErr != nil {
glog.Infof("Failed to update replica count for statefulset %v/%v; requeuing; error: %v", ps.Namespace, ps.Name, updateErr)
return errors.NewAggregate([]error{syncErr, updateErr})
}
return syncErr
}
// syncStatefulSet syncs a tuple of (statefulset, pets).
func (psc *StatefulSetController) syncStatefulSet(ps *apps.StatefulSet, pets []*v1.Pod) (int, error) {
glog.V(2).Infof("Syncing StatefulSet %v/%v with %d pods", ps.Namespace, ps.Name, len(pets))
it := NewStatefulSetIterator(ps, pets)
blockingPet, err := psc.blockingPetStore.Get(ps, pets)
if err != nil {
return 0, err
}
if blockingPet != nil {
glog.Infof("StatefulSet %v blocked from scaling on pod %v", ps.Name, blockingPet.pod.Name)
}
petManager := psc.newSyncer(blockingPet)
numPets := 0
for it.Next() {
pet := it.Value()
if pet == nil {
continue
}
switch pet.event {
case syncPet:
err = petManager.Sync(pet)
if err == nil {
numPets++
}
case deletePet:
err = petManager.Delete(pet)
}
switch err.(type) {
case errUnhealthyPet:
// We are not passing this error up, but we don't increment numPets if we encounter it,
// since numPets directly translates to statefulset.status.replicas
continue
case nil:
continue
default:
it.errs = append(it.errs, err)
}
}
if err := psc.blockingPetStore.Add(petManager.blockingPet); err != nil {
it.errs = append(it.errs, err)
}
// TODO: GC pvcs. We can't delete them per pet because of grace period, and
// in fact we *don't want to* till statefulset is stable to guarantee that bugs
// in the controller don't corrupt user data.
return numPets, errors.NewAggregate(it.errs)
}

View file

@ -0,0 +1,332 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"math/rand"
"reflect"
"testing"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/kubernetes/pkg/api/v1"
apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1"
"k8s.io/kubernetes/pkg/client/cache"
fakeinternal "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/apps/v1beta1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/apps/v1beta1/fake"
"k8s.io/kubernetes/pkg/client/legacylisters"
"k8s.io/kubernetes/pkg/controller"
)
func newFakeStatefulSetController() (*StatefulSetController, *fakePetClient) {
fpc := newFakePetClient()
return &StatefulSetController{
kubeClient: nil,
blockingPetStore: newUnHealthyPetTracker(fpc),
podStoreSynced: func() bool { return true },
psStore: listers.StoreToStatefulSetLister{Store: cache.NewStore(controller.KeyFunc)},
podStore: listers.StoreToPodLister{Indexer: cache.NewIndexer(controller.KeyFunc, cache.Indexers{})},
newSyncer: func(blockingPet *pcb) *petSyncer {
return &petSyncer{fpc, blockingPet}
},
}, fpc
}
func checkPets(ps *apps.StatefulSet, creates, deletes int, fc *fakePetClient, t *testing.T) {
if fc.petsCreated != creates || fc.petsDeleted != deletes {
t.Errorf("Found (creates: %d, deletes: %d), expected (creates: %d, deletes: %d)", fc.petsCreated, fc.petsDeleted, creates, deletes)
}
gotClaims := map[string]v1.PersistentVolumeClaim{}
for _, pvc := range fc.claims {
gotClaims[pvc.Name] = pvc
}
for i := range fc.pets {
expectedPet, _ := newPCB(fmt.Sprintf("%v", i), ps)
if identityHash(ps, fc.pets[i].pod) != identityHash(ps, expectedPet.pod) {
t.Errorf("Unexpected pod at index %d", i)
}
for _, pvc := range expectedPet.pvcs {
gotPVC, ok := gotClaims[pvc.Name]
if !ok {
t.Errorf("PVC %v not created for pod %v", pvc.Name, expectedPet.pod.Name)
}
if !reflect.DeepEqual(gotPVC.Spec, pvc.Spec) {
t.Errorf("got PVC %v differs from created pvc", pvc.Name)
}
}
}
}
func scaleStatefulSet(t *testing.T, ps *apps.StatefulSet, psc *StatefulSetController, fc *fakePetClient, scale int) error {
errs := []error{}
for i := 0; i < scale; i++ {
pl := fc.getPodList()
if len(pl) != i {
t.Errorf("Unexpected number of pods, expected %d found %d", i, len(pl))
}
if _, syncErr := psc.syncStatefulSet(ps, pl); syncErr != nil {
errs = append(errs, syncErr)
}
fc.setHealthy(i)
checkPets(ps, i+1, 0, fc, t)
}
return errors.NewAggregate(errs)
}
func saturateStatefulSet(t *testing.T, ps *apps.StatefulSet, psc *StatefulSetController, fc *fakePetClient) {
err := scaleStatefulSet(t, ps, psc, fc, int(*(ps.Spec.Replicas)))
if err != nil {
t.Errorf("Error scaleStatefulSet: %v", err)
}
}
func TestStatefulSetControllerCreates(t *testing.T) {
psc, fc := newFakeStatefulSetController()
replicas := 3
ps := newStatefulSet(replicas)
saturateStatefulSet(t, ps, psc, fc)
podList := fc.getPodList()
// Deleted pet gets recreated
fc.pets = fc.pets[:replicas-1]
if _, err := psc.syncStatefulSet(ps, podList); err != nil {
t.Errorf("Error syncing StatefulSet: %v", err)
}
checkPets(ps, replicas+1, 0, fc, t)
}
func TestStatefulSetControllerDeletes(t *testing.T) {
psc, fc := newFakeStatefulSetController()
replicas := 4
ps := newStatefulSet(replicas)
saturateStatefulSet(t, ps, psc, fc)
// Drain
errs := []error{}
*(ps.Spec.Replicas) = 0
knownPods := fc.getPodList()
for i := replicas - 1; i >= 0; i-- {
if len(fc.pets) != i+1 {
t.Errorf("Unexpected number of pods, expected %d found %d", i+1, len(fc.pets))
}
if _, syncErr := psc.syncStatefulSet(ps, knownPods); syncErr != nil {
errs = append(errs, syncErr)
}
}
if len(errs) != 0 {
t.Errorf("Error syncing StatefulSet: %v", errors.NewAggregate(errs))
}
checkPets(ps, replicas, replicas, fc, t)
}
func TestStatefulSetControllerRespectsTermination(t *testing.T) {
psc, fc := newFakeStatefulSetController()
replicas := 4
ps := newStatefulSet(replicas)
saturateStatefulSet(t, ps, psc, fc)
fc.setDeletionTimestamp(replicas - 1)
*(ps.Spec.Replicas) = 2
_, err := psc.syncStatefulSet(ps, fc.getPodList())
if err != nil {
t.Errorf("Error syncing StatefulSet: %v", err)
}
// Finding a pod with the deletion timestamp will pause all deletions.
knownPods := fc.getPodList()
if len(knownPods) != 4 {
t.Errorf("Pods deleted prematurely before deletion timestamp expired, len %d", len(knownPods))
}
fc.pets = fc.pets[:replicas-1]
_, err = psc.syncStatefulSet(ps, fc.getPodList())
if err != nil {
t.Errorf("Error syncing StatefulSet: %v", err)
}
checkPets(ps, replicas, 1, fc, t)
}
func TestStatefulSetControllerRespectsOrder(t *testing.T) {
psc, fc := newFakeStatefulSetController()
replicas := 4
ps := newStatefulSet(replicas)
saturateStatefulSet(t, ps, psc, fc)
errs := []error{}
*(ps.Spec.Replicas) = 0
// Shuffle known list and check that pets are deleted in reverse
knownPods := fc.getPodList()
for i := range knownPods {
j := rand.Intn(i + 1)
knownPods[i], knownPods[j] = knownPods[j], knownPods[i]
}
for i := 0; i < replicas; i++ {
if len(fc.pets) != replicas-i {
t.Errorf("Unexpected number of pods, expected %d found %d", i, len(fc.pets))
}
if _, syncErr := psc.syncStatefulSet(ps, knownPods); syncErr != nil {
errs = append(errs, syncErr)
}
checkPets(ps, replicas, i+1, fc, t)
}
if len(errs) != 0 {
t.Errorf("Error syncing StatefulSet: %v", errors.NewAggregate(errs))
}
}
func TestStatefulSetControllerBlocksScaling(t *testing.T) {
psc, fc := newFakeStatefulSetController()
replicas := 5
ps := newStatefulSet(replicas)
scaleStatefulSet(t, ps, psc, fc, 3)
// Create 4th pet, then before flipping it to healthy, kill the first pet.
// There should only be 1 not-healty pet at a time.
pl := fc.getPodList()
if _, err := psc.syncStatefulSet(ps, pl); err != nil {
t.Errorf("Error syncing StatefulSet: %v", err)
}
deletedPod := pl[0]
fc.deletePetAtIndex(0)
pl = fc.getPodList()
if _, err := psc.syncStatefulSet(ps, pl); err != nil {
t.Errorf("Error syncing StatefulSet: %v", err)
}
newPodList := fc.getPodList()
for _, p := range newPodList {
if p.Name == deletedPod.Name {
t.Errorf("Deleted pod was created while existing pod was unhealthy")
}
}
fc.setHealthy(len(newPodList) - 1)
if _, err := psc.syncStatefulSet(ps, pl); err != nil {
t.Errorf("Error syncing StatefulSet: %v", err)
}
found := false
for _, p := range fc.getPodList() {
if p.Name == deletedPod.Name {
found = true
break
}
}
if !found {
t.Errorf("Deleted pod was not created after existing pods became healthy")
}
}
func TestStatefulSetBlockingPetIsCleared(t *testing.T) {
psc, fc := newFakeStatefulSetController()
ps := newStatefulSet(3)
scaleStatefulSet(t, ps, psc, fc, 1)
if blocking, err := psc.blockingPetStore.Get(ps, fc.getPodList()); err != nil || blocking != nil {
t.Errorf("Unexpected blocking pod %v, err %v", blocking, err)
}
// 1 not yet healthy pet
psc.syncStatefulSet(ps, fc.getPodList())
if blocking, err := psc.blockingPetStore.Get(ps, fc.getPodList()); err != nil || blocking == nil {
t.Errorf("Expected blocking pod %v, err %v", blocking, err)
}
// Deleting the statefulset should clear the blocking pet
if err := psc.psStore.Store.Delete(ps); err != nil {
t.Fatalf("Unable to delete pod %v from statefulset controller store.", ps.Name)
}
if err := psc.Sync(fmt.Sprintf("%v/%v", ps.Namespace, ps.Name)); err != nil {
t.Errorf("Error during sync of deleted statefulset %v", err)
}
fc.pets = []*pcb{}
fc.petsCreated = 0
if blocking, err := psc.blockingPetStore.Get(ps, fc.getPodList()); err != nil || blocking != nil {
t.Errorf("Unexpected blocking pod %v, err %v", blocking, err)
}
saturateStatefulSet(t, ps, psc, fc)
// Make sure we don't leak the final blockin pet in the store
psc.syncStatefulSet(ps, fc.getPodList())
if p, exists, err := psc.blockingPetStore.store.GetByKey(fmt.Sprintf("%v/%v", ps.Namespace, ps.Name)); err != nil || exists {
t.Errorf("Unexpected blocking pod, err %v: %+v", err, p)
}
}
func TestSyncStatefulSetBlockedPet(t *testing.T) {
psc, fc := newFakeStatefulSetController()
ps := newStatefulSet(3)
i, _ := psc.syncStatefulSet(ps, fc.getPodList())
if i != len(fc.getPodList()) {
t.Errorf("syncStatefulSet should return actual amount of pods")
}
}
type fakeClient struct {
fakeinternal.Clientset
statefulsetClient *fakeStatefulSetClient
}
func (c *fakeClient) Apps() v1beta1.AppsV1beta1Interface {
return &fakeApps{c, &fake.FakeAppsV1beta1{}}
}
type fakeApps struct {
*fakeClient
*fake.FakeAppsV1beta1
}
func (c *fakeApps) StatefulSets(namespace string) v1beta1.StatefulSetInterface {
c.statefulsetClient.Namespace = namespace
return c.statefulsetClient
}
type fakeStatefulSetClient struct {
*fake.FakeStatefulSets
Namespace string
replicas int32
}
func (f *fakeStatefulSetClient) UpdateStatus(statefulset *apps.StatefulSet) (*apps.StatefulSet, error) {
f.replicas = statefulset.Status.Replicas
return statefulset, nil
}
func TestStatefulSetReplicaCount(t *testing.T) {
fpsc := &fakeStatefulSetClient{}
psc, _ := newFakeStatefulSetController()
psc.kubeClient = &fakeClient{
statefulsetClient: fpsc,
}
ps := newStatefulSet(3)
psKey := fmt.Sprintf("%v/%v", ps.Namespace, ps.Name)
psc.psStore.Store.Add(ps)
if err := psc.Sync(psKey); err != nil {
t.Errorf("Error during sync of deleted statefulset %v", err)
}
if fpsc.replicas != 1 {
t.Errorf("Replicas count sent as status update for StatefulSet should be 1, is %d instead", fpsc.replicas)
}
}

View file

@ -0,0 +1,158 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"sync"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/v1"
apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1"
"k8s.io/kubernetes/pkg/client/cache"
appsclientset "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/apps/v1beta1"
"k8s.io/kubernetes/pkg/controller"
"github.com/golang/glog"
)
// overlappingStatefulSets sorts a list of StatefulSets by creation timestamp, using their names as a tie breaker.
// Generally used to tie break between StatefulSets that have overlapping selectors.
type overlappingStatefulSets []apps.StatefulSet
func (o overlappingStatefulSets) Len() int { return len(o) }
func (o overlappingStatefulSets) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
func (o overlappingStatefulSets) Less(i, j int) bool {
if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
return o[i].Name < o[j].Name
}
return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
}
// updatePetCount attempts to update the Status.Replicas of the given StatefulSet, with a single GET/PUT retry.
func updatePetCount(psClient appsclientset.StatefulSetsGetter, ps apps.StatefulSet, numPets int) (updateErr error) {
if ps.Status.Replicas == int32(numPets) || psClient == nil {
return nil
}
var getErr error
for i, ps := 0, &ps; ; i++ {
glog.V(4).Infof(fmt.Sprintf("Updating replica count for StatefulSet: %s/%s, ", ps.Namespace, ps.Name) +
fmt.Sprintf("replicas %d->%d (need %d), ", ps.Status.Replicas, numPets, *(ps.Spec.Replicas)))
ps.Status = apps.StatefulSetStatus{Replicas: int32(numPets)}
_, updateErr = psClient.StatefulSets(ps.Namespace).UpdateStatus(ps)
if updateErr == nil || i >= statusUpdateRetries {
return updateErr
}
if ps, getErr = psClient.StatefulSets(ps.Namespace).Get(ps.Name, metav1.GetOptions{}); getErr != nil {
return getErr
}
}
}
// unhealthyPetTracker tracks unhealthy pets for statefulsets.
type unhealthyPetTracker struct {
pc petClient
store cache.Store
storeLock sync.Mutex
}
// Get returns a previously recorded blocking pet for the given statefulset.
func (u *unhealthyPetTracker) Get(ps *apps.StatefulSet, knownPets []*v1.Pod) (*pcb, error) {
u.storeLock.Lock()
defer u.storeLock.Unlock()
// We "Get" by key but "Add" by object because the store interface doesn't
// allow us to Get/Add a related obj (eg statefulset: blocking pet).
key, err := controller.KeyFunc(ps)
if err != nil {
return nil, err
}
obj, exists, err := u.store.GetByKey(key)
if err != nil {
return nil, err
}
hc := defaultPetHealthChecker{}
// There's no unhealthy pet blocking a scale event, but this might be
// a controller manager restart. If it is, knownPets can be trusted.
if !exists {
for _, p := range knownPets {
if hc.isHealthy(p) && !hc.isDying(p) {
glog.V(4).Infof("Ignoring healthy pod %v for StatefulSet %v", p.Name, ps.Name)
continue
}
glog.V(4).Infof("No recorded blocking pod, but found unhealthy pod %v for StatefulSet %v", p.Name, ps.Name)
return &pcb{pod: p, parent: ps}, nil
}
return nil, nil
}
// This is a pet that's blocking further creates/deletes of a statefulset. If it
// disappears, it's no longer blocking. If it exists, it continues to block
// till it turns healthy or disappears.
bp := obj.(*pcb)
blockingPet, exists, err := u.pc.Get(bp)
if err != nil {
return nil, err
}
if !exists {
glog.V(4).Infof("Clearing blocking pod %v for StatefulSet %v because it's been deleted", bp.pod.Name, ps.Name)
return nil, nil
}
blockingPetPod := blockingPet.pod
if hc.isHealthy(blockingPetPod) && !hc.isDying(blockingPetPod) {
glog.V(4).Infof("Clearing blocking pod %v for StatefulSet %v because it's healthy", bp.pod.Name, ps.Name)
u.store.Delete(blockingPet)
blockingPet = nil
}
return blockingPet, nil
}
// Add records the given pet as a blocking pet.
func (u *unhealthyPetTracker) Add(blockingPet *pcb) error {
u.storeLock.Lock()
defer u.storeLock.Unlock()
if blockingPet == nil {
return nil
}
glog.V(4).Infof("Adding blocking pod %v for StatefulSet %v", blockingPet.pod.Name, blockingPet.parent.Name)
return u.store.Add(blockingPet)
}
// newUnHealthyPetTracker tracks unhealthy pets that block progress of statefulsets.
func newUnHealthyPetTracker(pc petClient) *unhealthyPetTracker {
return &unhealthyPetTracker{pc: pc, store: cache.NewStore(pcbKeyFunc)}
}
// pcbKeyFunc computes the key for a given pcb.
// If it's given a key, it simply returns it.
func pcbKeyFunc(obj interface{}) (string, error) {
if key, ok := obj.(string); ok {
return key, nil
}
p, ok := obj.(*pcb)
if !ok {
return "", fmt.Errorf("not a valid pod control block %#v", p)
}
if p.parent == nil {
return "", fmt.Errorf("cannot compute pod control block key without parent pointer %#v", p)
}
return controller.KeyFunc(p.parent)
}

View file

@ -0,0 +1,327 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"strconv"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/kubernetes/pkg/api/v1"
apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
"k8s.io/kubernetes/pkg/client/record"
"github.com/golang/glog"
)
// petLifeCycleEvent is used to communicate high level actions the controller
// needs to take on a given pet. It's recorded in the pcb. The recognized values
// are listed below.
type petLifeCycleEvent string
const (
syncPet petLifeCycleEvent = "sync"
deletePet petLifeCycleEvent = "delete"
// updateRetries is the number of Get/Update cycles we perform when an
// update fails.
updateRetries = 3
// StatefulSetInitAnnotation is an annotation which when set, indicates that the
// pet has finished initializing itself.
// TODO: Replace this with init container status.
StatefulSetInitAnnotation = "pod.alpha.kubernetes.io/initialized"
)
// pcb is the control block used to transmit all updates about a single pet.
// It serves as the manifest for a single pet. Users must populate the pod
// and parent fields to pass it around safely.
type pcb struct {
// pod is the desired pet pod.
pod *v1.Pod
// pvcs is a list of desired persistent volume claims for the pet pod.
pvcs []v1.PersistentVolumeClaim
// event is the lifecycle event associated with this update.
event petLifeCycleEvent
// id is the identity index of this pet.
id string
// parent is a pointer to the parent statefulset.
parent *apps.StatefulSet
}
// pvcClient is a client for managing persistent volume claims.
type pvcClient interface {
// DeletePVCs deletes the pvcs in the given pcb.
DeletePVCs(*pcb) error
// SyncPVCs creates/updates pvcs in the given pcb.
SyncPVCs(*pcb) error
}
// petSyncer syncs a single pet.
type petSyncer struct {
petClient
// blockingPet is an unhealthy pet either from this iteration or a previous
// iteration, either because it is not yet Running, or being Deleted, that
// prevents other creates/deletions.
blockingPet *pcb
}
// errUnhealthyPet is returned when a we either know for sure a pet is unhealthy,
// or don't know its state but assume it is unhealthy. It's used as a signal to the caller for further operations like updating status.replicas.
// This is not a fatal error.
type errUnhealthyPet string
func (e errUnhealthyPet) Error() string {
return string(e)
}
// Sync syncs the given pet.
func (p *petSyncer) Sync(pet *pcb) error {
if pet == nil {
return nil
}
realPet, exists, err := p.Get(pet)
if err != nil {
return err
}
// There is not constraint except quota on the number of pvcs created.
// This is done per pet so we get a working cluster ASAP, even if user
// runs out of quota.
if err := p.SyncPVCs(pet); err != nil {
return err
}
// if pet failed - we need to remove old one because of consistent naming
if exists && realPet.pod.Status.Phase == v1.PodFailed {
glog.V(2).Infof("Deleting evicted pod %v/%v", realPet.pod.Namespace, realPet.pod.Name)
if err := p.petClient.Delete(realPet); err != nil {
return err
}
} else if exists {
if !p.isHealthy(realPet.pod) {
glog.V(4).Infof("StatefulSet %v waiting on unhealthy pod %v", pet.parent.Name, realPet.pod.Name)
}
return p.Update(realPet, pet)
}
if p.blockingPet != nil {
message := errUnhealthyPet(fmt.Sprintf("Create of %v in StatefulSet %v blocked by unhealthy pod %v", pet.pod.Name, pet.parent.Name, p.blockingPet.pod.Name))
glog.V(4).Infof(message.Error())
return message
}
// This is counted as a create, even if it fails. We can't skip indices
// because some pets might allocate a special role to earlier indices.
// The returned error will force a requeue.
// TODO: What's the desired behavior if pet-0 is deleted while pet-1 is
// not yet healthy? currently pet-0 will wait till pet-1 is healthy,
// this feels safer, but might lead to deadlock.
p.blockingPet = pet
if err := p.Create(pet); err != nil {
return err
}
return nil
}
// Delete deletes the given pet, if no other pet in the statefulset is blocking a
// scale event.
func (p *petSyncer) Delete(pet *pcb) error {
if pet == nil {
return nil
}
realPet, exists, err := p.Get(pet)
if err != nil {
return err
}
if !exists {
return nil
}
if p.blockingPet != nil {
glog.V(4).Infof("Delete of %v in StatefulSet %v blocked by unhealthy pod %v", realPet.pod.Name, pet.parent.Name, p.blockingPet.pod.Name)
return nil
}
// This is counted as a delete, even if it fails.
// The returned error will force a requeue.
p.blockingPet = realPet
if !p.isDying(realPet.pod) {
glog.V(2).Infof("StatefulSet %v deleting pod %v/%v", pet.parent.Name, pet.pod.Namespace, pet.pod.Name)
return p.petClient.Delete(pet)
}
glog.V(4).Infof("StatefulSet %v waiting on pod %v to die in %v", pet.parent.Name, realPet.pod.Name, realPet.pod.DeletionTimestamp)
return nil
}
// petClient is a client for managing pets.
type petClient interface {
pvcClient
petHealthChecker
Delete(*pcb) error
Get(*pcb) (*pcb, bool, error)
Create(*pcb) error
Update(*pcb, *pcb) error
}
// apiServerPetClient is a statefulset aware Kubernetes client.
type apiServerPetClient struct {
c clientset.Interface
recorder record.EventRecorder
petHealthChecker
}
// Get gets the pet in the pcb from the apiserver.
func (p *apiServerPetClient) Get(pet *pcb) (*pcb, bool, error) {
ns := pet.parent.Namespace
pod, err := p.c.Core().Pods(ns).Get(pet.pod.Name, metav1.GetOptions{})
if errors.IsNotFound(err) {
return nil, false, nil
}
if err != nil {
return nil, false, err
}
realPet := *pet
realPet.pod = pod
return &realPet, true, nil
}
// Delete deletes the pet in the pcb from the apiserver.
func (p *apiServerPetClient) Delete(pet *pcb) error {
err := p.c.Core().Pods(pet.parent.Namespace).Delete(pet.pod.Name, nil)
if errors.IsNotFound(err) {
err = nil
}
p.event(pet.parent, "Delete", fmt.Sprintf("pod: %v", pet.pod.Name), err)
return err
}
// Create creates the pet in the pcb.
func (p *apiServerPetClient) Create(pet *pcb) error {
_, err := p.c.Core().Pods(pet.parent.Namespace).Create(pet.pod)
p.event(pet.parent, "Create", fmt.Sprintf("pod: %v", pet.pod.Name), err)
return err
}
// Update updates the pet in the 'pet' pcb to match the pet in the 'expectedPet' pcb.
// If the pod object of a pet which to be updated has been changed in server side, we
// will get the actual value and set pet identity before retries.
func (p *apiServerPetClient) Update(pet *pcb, expectedPet *pcb) (updateErr error) {
pc := p.c.Core().Pods(pet.parent.Namespace)
for i := 0; ; i++ {
updatePod, needsUpdate, err := copyPetID(pet, expectedPet)
if err != nil || !needsUpdate {
return err
}
glog.V(4).Infof("Resetting pod %v/%v to match StatefulSet %v spec", pet.pod.Namespace, pet.pod.Name, pet.parent.Name)
_, updateErr = pc.Update(&updatePod)
if updateErr == nil || i >= updateRetries {
return updateErr
}
getPod, getErr := pc.Get(updatePod.Name, metav1.GetOptions{})
if getErr != nil {
return getErr
}
pet.pod = getPod
}
}
// DeletePVCs should delete PVCs, when implemented.
func (p *apiServerPetClient) DeletePVCs(pet *pcb) error {
// TODO: Implement this when we delete pvcs.
return nil
}
func (p *apiServerPetClient) getPVC(pvcName, pvcNamespace string) (*v1.PersistentVolumeClaim, error) {
pvc, err := p.c.Core().PersistentVolumeClaims(pvcNamespace).Get(pvcName, metav1.GetOptions{})
return pvc, err
}
func (p *apiServerPetClient) createPVC(pvc *v1.PersistentVolumeClaim) error {
_, err := p.c.Core().PersistentVolumeClaims(pvc.Namespace).Create(pvc)
return err
}
// SyncPVCs syncs pvcs in the given pcb.
func (p *apiServerPetClient) SyncPVCs(pet *pcb) error {
errmsg := ""
// Create new claims.
for i, pvc := range pet.pvcs {
_, err := p.getPVC(pvc.Name, pet.parent.Namespace)
if err != nil {
if errors.IsNotFound(err) {
var err error
if err = p.createPVC(&pet.pvcs[i]); err != nil {
errmsg += fmt.Sprintf("Failed to create %v: %v", pvc.Name, err)
}
p.event(pet.parent, "Create", fmt.Sprintf("pvc: %v", pvc.Name), err)
} else {
errmsg += fmt.Sprintf("Error trying to get pvc %v, %v.", pvc.Name, err)
}
}
// TODO: Check resource requirements and accessmodes, update if necessary
}
if len(errmsg) != 0 {
return fmt.Errorf("%v", errmsg)
}
return nil
}
// event formats an event for the given runtime object.
func (p *apiServerPetClient) event(obj runtime.Object, reason, msg string, err error) {
if err != nil {
p.recorder.Eventf(obj, v1.EventTypeWarning, fmt.Sprintf("Failed%v", reason), fmt.Sprintf("%v, error: %v", msg, err))
} else {
p.recorder.Eventf(obj, v1.EventTypeNormal, fmt.Sprintf("Successful%v", reason), msg)
}
}
// petHealthChecker is an interface to check pet health. It makes a boolean
// decision based on the given pod.
type petHealthChecker interface {
isHealthy(*v1.Pod) bool
isDying(*v1.Pod) bool
}
// defaultPetHealthChecks does basic health checking.
// It doesn't update, probe or get the pod.
type defaultPetHealthChecker struct{}
// isHealthy returns true if the pod is ready & running. If the pod has the
// "pod.alpha.kubernetes.io/initialized" annotation set to "false", pod state is ignored.
func (d *defaultPetHealthChecker) isHealthy(pod *v1.Pod) bool {
if pod == nil || pod.Status.Phase != v1.PodRunning {
return false
}
podReady := v1.IsPodReady(pod)
// User may have specified a pod readiness override through a debug annotation.
initialized, ok := pod.Annotations[StatefulSetInitAnnotation]
if ok {
if initAnnotation, err := strconv.ParseBool(initialized); err != nil {
glog.V(4).Infof("Failed to parse %v annotation on pod %v: %v", StatefulSetInitAnnotation, pod.Name, err)
} else if !initAnnotation {
glog.V(4).Infof("StatefulSet pod %v waiting on annotation %v", pod.Name, StatefulSetInitAnnotation)
podReady = initAnnotation
}
}
return podReady
}
// isDying returns true if the pod has a non-nil deletion timestamp. Since the
// timestamp can only decrease, once this method returns true for a given pet, it
// will never return false.
func (d *defaultPetHealthChecker) isDying(pod *v1.Pod) bool {
return pod != nil && pod.DeletionTimestamp != nil
}

View file

@ -0,0 +1,178 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package statefulset
import (
"fmt"
"net/http/httptest"
"testing"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
restclient "k8s.io/client-go/rest"
utiltesting "k8s.io/client-go/util/testing"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake"
"k8s.io/kubernetes/pkg/client/testing/core"
)
func newPetClient(client *clientset.Clientset) *apiServerPetClient {
return &apiServerPetClient{
c: client,
}
}
func makeTwoDifferntPCB() (pcb1, pcb2 *pcb) {
userAdded := v1.Volume{
Name: "test",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{Medium: v1.StorageMediumMemory},
},
}
ps := newStatefulSet(2)
pcb1, _ = newPCB("1", ps)
pcb2, _ = newPCB("2", ps)
pcb2.pod.Spec.Volumes = append(pcb2.pod.Spec.Volumes, userAdded)
return pcb1, pcb2
}
func TestUpdatePetWithoutRetry(t *testing.T) {
pcb1, pcb2 := makeTwoDifferntPCB()
// invalid pet with empty pod
invalidPcb := *pcb1
invalidPcb.pod = nil
testCases := []struct {
realPet *pcb
expectedPet *pcb
expectErr bool
requests int
}{
// case 0: error occurs, no need to update
{
realPet: pcb1,
expectedPet: &invalidPcb,
expectErr: true,
requests: 0,
},
// case 1: identical pet, no need to update
{
realPet: pcb1,
expectedPet: pcb1,
expectErr: false,
requests: 0,
},
// case 2: need to call update once
{
realPet: pcb1,
expectedPet: pcb2,
expectErr: false,
requests: 1,
},
}
for k, tc := range testCases {
body := runtime.EncodeOrDie(testapi.Default.Codec(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "empty_pod"}})
fakeHandler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: string(body),
}
testServer := httptest.NewServer(&fakeHandler)
client := clientset.NewForConfigOrDie(&restclient.Config{Host: testServer.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
petClient := newPetClient(client)
err := petClient.Update(tc.realPet, tc.expectedPet)
if tc.expectErr != (err != nil) {
t.Errorf("case %d: expect error(%v), got err: %v", k, tc.expectErr, err)
}
fakeHandler.ValidateRequestCount(t, tc.requests)
testServer.Close()
}
}
func TestUpdatePetWithFailure(t *testing.T) {
fakeHandler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "{}",
}
testServer := httptest.NewServer(&fakeHandler)
defer testServer.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: testServer.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
petClient := newPetClient(client)
pcb1, pcb2 := makeTwoDifferntPCB()
if err := petClient.Update(pcb1, pcb2); err == nil {
t.Errorf("expect error, got nil")
}
// 1 Update and 1 GET, both of which fail
fakeHandler.ValidateRequestCount(t, 2)
}
func TestUpdatePetRetrySucceed(t *testing.T) {
pcb1, pcb2 := makeTwoDifferntPCB()
fakeClient := &fake.Clientset{}
fakeClient.AddReactor("get", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, pcb2.pod, nil
})
fakeClient.AddReactor("*", "*", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, fmt.Errorf("Fake error")
})
petClient := apiServerPetClient{
c: fakeClient,
}
if err := petClient.Update(pcb1, pcb2); err != nil {
t.Errorf("unexpected error: %v", err)
}
actions := fakeClient.Actions()
if len(actions) != 2 {
t.Errorf("Expect 2 actions, got %d actions", len(actions))
}
for i := 0; i < len(actions); i++ {
a := actions[i]
if a.GetResource().Resource != "pods" {
t.Errorf("Unexpected action %+v", a)
continue
}
switch action := a.(type) {
case core.GetAction:
if i%2 == 0 {
t.Errorf("Unexpected Get action")
}
// Make sure the get is for the right pod
if action.GetName() != pcb2.pod.Name {
t.Errorf("Expected get pod %v, got %q instead", pcb2.pod.Name, action.GetName())
}
case core.UpdateAction:
if i%2 == 1 {
t.Errorf("Unexpected Update action")
}
default:
t.Errorf("Unexpected action %+v", a)
break
}
}
}