/* Copyright 2014 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package limitranger import ( "fmt" "io" "sort" "strings" "time" lru "github.com/hashicorp/golang-lru" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apiserver/pkg/admission" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/resource" "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" coreinternallisters "k8s.io/kubernetes/pkg/client/listers/core/internalversion" "k8s.io/kubernetes/pkg/controller/informers" kubeapiserveradmission "k8s.io/kubernetes/pkg/kubeapiserver/admission" ) const ( limitRangerAnnotation = "kubernetes.io/limit-ranger" ) func init() { admission.RegisterPlugin("LimitRanger", func(config io.Reader) (admission.Interface, error) { return NewLimitRanger(&DefaultLimitRangerActions{}) }) } // limitRanger enforces usage limits on a per resource basis in the namespace type limitRanger struct { *admission.Handler client internalclientset.Interface actions LimitRangerActions lister coreinternallisters.LimitRangeLister // liveLookups holds the last few live lookups we've done to help ammortize cost on repeated lookup failures. // This let's us handle the case of latent caches, by looking up actual results for a namespace on cache miss/no results. // We track the lookup result here so that for repeated requests, we don't look it up very often. liveLookupCache *lru.Cache liveTTL time.Duration } type liveLookupEntry struct { expiry time.Time items []*api.LimitRange } func (l *limitRanger) SetInformerFactory(f informers.SharedInformerFactory) { limitRangeInformer := f.InternalLimitRanges().Informer() l.SetReadyFunc(limitRangeInformer.HasSynced) l.lister = f.InternalLimitRanges().Lister() } func (l *limitRanger) Validate() error { if l.lister == nil { return fmt.Errorf("missing limitRange lister") } if l.client == nil { return fmt.Errorf("missing client") } return nil } // Admit admits resources into cluster that do not violate any defined LimitRange in the namespace func (l *limitRanger) Admit(a admission.Attributes) (err error) { if !l.actions.SupportsAttributes(a) { return nil } obj := a.GetObject() name := "Unknown" if obj != nil { name, _ = meta.NewAccessor().Name(obj) if len(name) == 0 { name, _ = meta.NewAccessor().GenerateName(obj) } } items, err := l.lister.LimitRanges(a.GetNamespace()).List(labels.Everything()) if err != nil { return admission.NewForbidden(a, fmt.Errorf("unable to %s %v at this time because there was an error enforcing limit ranges", a.GetOperation(), a.GetResource())) } // if there are no items held in our indexer, check our live-lookup LRU, if that misses, do the live lookup to prime it. if len(items) == 0 { lruItemObj, ok := l.liveLookupCache.Get(a.GetNamespace()) if !ok || lruItemObj.(liveLookupEntry).expiry.Before(time.Now()) { // TODO: If there are multiple operations at the same time and cache has just expired, // this may cause multiple List operations being issued at the same time. // If there is already in-flight List() for a given namespace, we should wait until // it is finished and cache is updated instead of doing the same, also to avoid // throttling - see #22422 for details. liveList, err := l.client.Core().LimitRanges(a.GetNamespace()).List(metav1.ListOptions{}) if err != nil { return admission.NewForbidden(a, err) } newEntry := liveLookupEntry{expiry: time.Now().Add(l.liveTTL)} for i := range liveList.Items { newEntry.items = append(newEntry.items, &liveList.Items[i]) } l.liveLookupCache.Add(a.GetNamespace(), newEntry) lruItemObj = newEntry } lruEntry := lruItemObj.(liveLookupEntry) for i := range lruEntry.items { items = append(items, lruEntry.items[i]) } } // ensure it meets each prescribed min/max for i := range items { limitRange := items[i] if !l.actions.SupportsLimit(limitRange) { continue } err = l.actions.Limit(limitRange, a.GetResource().Resource, a.GetObject()) if err != nil { return admission.NewForbidden(a, err) } } return nil } // NewLimitRanger returns an object that enforces limits based on the supplied limit function func NewLimitRanger(actions LimitRangerActions) (admission.Interface, error) { liveLookupCache, err := lru.New(10000) if err != nil { return nil, err } if actions == nil { actions = &DefaultLimitRangerActions{} } return &limitRanger{ Handler: admission.NewHandler(admission.Create, admission.Update), actions: actions, liveLookupCache: liveLookupCache, liveTTL: time.Duration(30 * time.Second), }, nil } var _ = kubeapiserveradmission.WantsInternalClientSet(&limitRanger{}) func (a *limitRanger) SetInternalClientSet(client internalclientset.Interface) { a.client = client } // defaultContainerResourceRequirements returns the default requirements for a container // the requirement.Limits are taken from the LimitRange defaults (if specified) // the requirement.Requests are taken from the LimitRange default request (if specified) func defaultContainerResourceRequirements(limitRange *api.LimitRange) api.ResourceRequirements { requirements := api.ResourceRequirements{} requirements.Requests = api.ResourceList{} requirements.Limits = api.ResourceList{} for i := range limitRange.Spec.Limits { limit := limitRange.Spec.Limits[i] if limit.Type == api.LimitTypeContainer { for k, v := range limit.DefaultRequest { value := v.Copy() requirements.Requests[k] = *value } for k, v := range limit.Default { value := v.Copy() requirements.Limits[k] = *value } } } return requirements } // mergeContainerResources handles defaulting all of the resources on a container. func mergeContainerResources(container *api.Container, defaultRequirements *api.ResourceRequirements, annotationPrefix string, annotations []string) []string { setRequests := []string{} setLimits := []string{} if container.Resources.Limits == nil { container.Resources.Limits = api.ResourceList{} } if container.Resources.Requests == nil { container.Resources.Requests = api.ResourceList{} } for k, v := range defaultRequirements.Limits { _, found := container.Resources.Limits[k] if !found { container.Resources.Limits[k] = *v.Copy() setLimits = append(setLimits, string(k)) } } for k, v := range defaultRequirements.Requests { _, found := container.Resources.Requests[k] if !found { container.Resources.Requests[k] = *v.Copy() setRequests = append(setRequests, string(k)) } } if len(setRequests) > 0 { sort.Strings(setRequests) a := strings.Join(setRequests, ", ") + fmt.Sprintf(" request for %s %s", annotationPrefix, container.Name) annotations = append(annotations, a) } if len(setLimits) > 0 { sort.Strings(setLimits) a := strings.Join(setLimits, ", ") + fmt.Sprintf(" limit for %s %s", annotationPrefix, container.Name) annotations = append(annotations, a) } return annotations } // mergePodResourceRequirements merges enumerated requirements with default requirements // it annotates the pod with information about what requirements were modified func mergePodResourceRequirements(pod *api.Pod, defaultRequirements *api.ResourceRequirements) { annotations := []string{} for i := range pod.Spec.Containers { annotations = mergeContainerResources(&pod.Spec.Containers[i], defaultRequirements, "container", annotations) } for i := range pod.Spec.InitContainers { annotations = mergeContainerResources(&pod.Spec.InitContainers[i], defaultRequirements, "init container", annotations) } if len(annotations) > 0 { if pod.ObjectMeta.Annotations == nil { pod.ObjectMeta.Annotations = make(map[string]string) } val := "LimitRanger plugin set: " + strings.Join(annotations, "; ") pod.ObjectMeta.Annotations[limitRangerAnnotation] = val } } // requestLimitEnforcedValues returns the specified values at a common precision to support comparability func requestLimitEnforcedValues(requestQuantity, limitQuantity, enforcedQuantity resource.Quantity) (request, limit, enforced int64) { request = requestQuantity.Value() limit = limitQuantity.Value() enforced = enforcedQuantity.Value() // do a more precise comparison if possible (if the value won't overflow) if request <= resource.MaxMilliValue && limit <= resource.MaxMilliValue && enforced <= resource.MaxMilliValue { request = requestQuantity.MilliValue() limit = limitQuantity.MilliValue() enforced = enforcedQuantity.MilliValue() } return } // minConstraint enforces the min constraint over the specified resource func minConstraint(limitType api.LimitType, resourceName api.ResourceName, enforced resource.Quantity, request api.ResourceList, limit api.ResourceList) error { req, reqExists := request[resourceName] lim, limExists := limit[resourceName] observedReqValue, observedLimValue, enforcedValue := requestLimitEnforcedValues(req, lim, enforced) if !reqExists { return fmt.Errorf("minimum %s usage per %s is %s. No request is specified.", resourceName, limitType, enforced.String()) } if observedReqValue < enforcedValue { return fmt.Errorf("minimum %s usage per %s is %s, but request is %s.", resourceName, limitType, enforced.String(), req.String()) } if limExists && (observedLimValue < enforcedValue) { return fmt.Errorf("minimum %s usage per %s is %s, but limit is %s.", resourceName, limitType, enforced.String(), lim.String()) } return nil } // maxRequestConstraint enforces the max constraint over the specified resource // use when specify LimitType resource doesn't recognize limit values func maxRequestConstraint(limitType api.LimitType, resourceName api.ResourceName, enforced resource.Quantity, request api.ResourceList) error { req, reqExists := request[resourceName] observedReqValue, _, enforcedValue := requestLimitEnforcedValues(req, resource.Quantity{}, enforced) if !reqExists { return fmt.Errorf("maximum %s usage per %s is %s. No request is specified.", resourceName, limitType, enforced.String()) } if observedReqValue > enforcedValue { return fmt.Errorf("maximum %s usage per %s is %s, but request is %s.", resourceName, limitType, enforced.String(), req.String()) } return nil } // maxConstraint enforces the max constraint over the specified resource func maxConstraint(limitType api.LimitType, resourceName api.ResourceName, enforced resource.Quantity, request api.ResourceList, limit api.ResourceList) error { req, reqExists := request[resourceName] lim, limExists := limit[resourceName] observedReqValue, observedLimValue, enforcedValue := requestLimitEnforcedValues(req, lim, enforced) if !limExists { return fmt.Errorf("maximum %s usage per %s is %s. No limit is specified.", resourceName, limitType, enforced.String()) } if observedLimValue > enforcedValue { return fmt.Errorf("maximum %s usage per %s is %s, but limit is %s.", resourceName, limitType, enforced.String(), lim.String()) } if reqExists && (observedReqValue > enforcedValue) { return fmt.Errorf("maximum %s usage per %s is %s, but request is %s.", resourceName, limitType, enforced.String(), req.String()) } return nil } // limitRequestRatioConstraint enforces the limit to request ratio over the specified resource func limitRequestRatioConstraint(limitType api.LimitType, resourceName api.ResourceName, enforced resource.Quantity, request api.ResourceList, limit api.ResourceList) error { req, reqExists := request[resourceName] lim, limExists := limit[resourceName] observedReqValue, observedLimValue, _ := requestLimitEnforcedValues(req, lim, enforced) if !reqExists || (observedReqValue == int64(0)) { return fmt.Errorf("%s max limit to request ratio per %s is %s, but no request is specified or request is 0.", resourceName, limitType, enforced.String()) } if !limExists || (observedLimValue == int64(0)) { return fmt.Errorf("%s max limit to request ratio per %s is %s, but no limit is specified or limit is 0.", resourceName, limitType, enforced.String()) } observedRatio := float64(observedLimValue) / float64(observedReqValue) displayObservedRatio := observedRatio maxLimitRequestRatio := float64(enforced.Value()) if enforced.Value() <= resource.MaxMilliValue { observedRatio = observedRatio * 1000 maxLimitRequestRatio = float64(enforced.MilliValue()) } if observedRatio > maxLimitRequestRatio { return fmt.Errorf("%s max limit to request ratio per %s is %s, but provided ratio is %f.", resourceName, limitType, enforced.String(), displayObservedRatio) } return nil } // sum takes the total of each named resource across all inputs // if a key is not in each input, then the output resource list will omit the key func sum(inputs []api.ResourceList) api.ResourceList { result := api.ResourceList{} keys := []api.ResourceName{} for i := range inputs { for k := range inputs[i] { keys = append(keys, k) } } for _, key := range keys { total, isSet := int64(0), true for i := range inputs { input := inputs[i] v, exists := input[key] if exists { if key == api.ResourceCPU { total = total + v.MilliValue() } else { total = total + v.Value() } } else { isSet = false } } if isSet { if key == api.ResourceCPU { result[key] = *(resource.NewMilliQuantity(total, resource.DecimalSI)) } else { result[key] = *(resource.NewQuantity(total, resource.DecimalSI)) } } } return result } // DefaultLimitRangerActions is the default implementation of LimitRangerActions. type DefaultLimitRangerActions struct{} // ensure DefaultLimitRangerActions implements the LimitRangerActions interface. var _ LimitRangerActions = &DefaultLimitRangerActions{} // Limit enforces resource requirements of incoming resources against enumerated constraints // on the LimitRange. It may modify the incoming object to apply default resource requirements // if not specified, and enumerated on the LimitRange func (d *DefaultLimitRangerActions) Limit(limitRange *api.LimitRange, resourceName string, obj runtime.Object) error { switch resourceName { case "pods": return PodLimitFunc(limitRange, obj.(*api.Pod)) case "persistentvolumeclaims": return PersistentVolumeClaimLimitFunc(limitRange, obj.(*api.PersistentVolumeClaim)) } return nil } // SupportsAttributes ignores all calls that do not deal with pod resources or storage requests (PVCs). // Also ignores any call that has a subresource defined. func (d *DefaultLimitRangerActions) SupportsAttributes(a admission.Attributes) bool { if a.GetSubresource() != "" { return false } return a.GetKind().GroupKind() == api.Kind("Pod") || a.GetKind().GroupKind() == api.Kind("PersistentVolumeClaim") } // SupportsLimit always returns true. func (d *DefaultLimitRangerActions) SupportsLimit(limitRange *api.LimitRange) bool { return true } // PersistentVolumeClaimLimitFunc enforces storage limits for PVCs. // Users request storage via pvc.Spec.Resources.Requests. Min/Max is enforced by an admin with LimitRange. // Claims will not be modified with default values because storage is a required part of pvc.Spec. // All storage enforced values *only* apply to pvc.Spec.Resources.Requests. func PersistentVolumeClaimLimitFunc(limitRange *api.LimitRange, pvc *api.PersistentVolumeClaim) error { var errs []error for i := range limitRange.Spec.Limits { limit := limitRange.Spec.Limits[i] limitType := limit.Type if limitType == api.LimitTypePersistentVolumeClaim { for k, v := range limit.Min { // normal usage of minConstraint. pvc.Spec.Resources.Limits is not recognized as user input if err := minConstraint(limitType, k, v, pvc.Spec.Resources.Requests, api.ResourceList{}); err != nil { errs = append(errs, err) } } for k, v := range limit.Max { // We want to enforce the max of the LimitRange against what // the user requested. if err := maxRequestConstraint(limitType, k, v, pvc.Spec.Resources.Requests); err != nil { errs = append(errs, err) } } } } return utilerrors.NewAggregate(errs) } // PodLimitFunc enforces resource requirements enumerated by the pod against // the specified LimitRange. The pod may be modified to apply default resource // requirements if not specified, and enumerated on the LimitRange func PodLimitFunc(limitRange *api.LimitRange, pod *api.Pod) error { var errs []error defaultResources := defaultContainerResourceRequirements(limitRange) mergePodResourceRequirements(pod, &defaultResources) for i := range limitRange.Spec.Limits { limit := limitRange.Spec.Limits[i] limitType := limit.Type // enforce container limits if limitType == api.LimitTypeContainer { for j := range pod.Spec.Containers { container := &pod.Spec.Containers[j] for k, v := range limit.Min { if err := minConstraint(limitType, k, v, container.Resources.Requests, container.Resources.Limits); err != nil { errs = append(errs, err) } } for k, v := range limit.Max { if err := maxConstraint(limitType, k, v, container.Resources.Requests, container.Resources.Limits); err != nil { errs = append(errs, err) } } for k, v := range limit.MaxLimitRequestRatio { if err := limitRequestRatioConstraint(limitType, k, v, container.Resources.Requests, container.Resources.Limits); err != nil { errs = append(errs, err) } } } for j := range pod.Spec.InitContainers { container := &pod.Spec.InitContainers[j] for k, v := range limit.Min { if err := minConstraint(limitType, k, v, container.Resources.Requests, container.Resources.Limits); err != nil { errs = append(errs, err) } } for k, v := range limit.Max { if err := maxConstraint(limitType, k, v, container.Resources.Requests, container.Resources.Limits); err != nil { errs = append(errs, err) } } for k, v := range limit.MaxLimitRequestRatio { if err := limitRequestRatioConstraint(limitType, k, v, container.Resources.Requests, container.Resources.Limits); err != nil { errs = append(errs, err) } } } } // enforce pod limits on init containers if limitType == api.LimitTypePod { containerRequests, containerLimits := []api.ResourceList{}, []api.ResourceList{} for j := range pod.Spec.Containers { container := &pod.Spec.Containers[j] containerRequests = append(containerRequests, container.Resources.Requests) containerLimits = append(containerLimits, container.Resources.Limits) } podRequests := sum(containerRequests) podLimits := sum(containerLimits) for j := range pod.Spec.InitContainers { container := &pod.Spec.InitContainers[j] // take max(sum_containers, any_init_container) for k, v := range container.Resources.Requests { if v2, ok := podRequests[k]; ok { if v.Cmp(v2) > 0 { podRequests[k] = v } } else { podRequests[k] = v } } for k, v := range container.Resources.Limits { if v2, ok := podLimits[k]; ok { if v.Cmp(v2) > 0 { podLimits[k] = v } } else { podLimits[k] = v } } } for k, v := range limit.Min { if err := minConstraint(limitType, k, v, podRequests, podLimits); err != nil { errs = append(errs, err) } } for k, v := range limit.Max { if err := maxConstraint(limitType, k, v, podRequests, podLimits); err != nil { errs = append(errs, err) } } for k, v := range limit.MaxLimitRequestRatio { if err := limitRequestRatioConstraint(limitType, k, v, podRequests, podLimits); err != nil { errs = append(errs, err) } } } } return utilerrors.NewAggregate(errs) }