8e5b17cf13
Signed-off-by: Mrunal Patel <mrunalp@gmail.com>
79 lines
3.3 KiB
Go
79 lines
3.3 KiB
Go
/*
|
|
Copyright 2015 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package qos
|
|
|
|
import (
|
|
"k8s.io/kubernetes/pkg/api/v1"
|
|
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
|
)
|
|
|
|
const (
|
|
// PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make
|
|
// sense to set sandbox level oom score, e.g. a sandbox could only be a namespace
|
|
// without a process.
|
|
// TODO: Handle infra container oom score adj in a runtime agnostic way.
|
|
// TODO: Should handle critical pod oom score adj with a proper preemption priority.
|
|
// This is the workaround for https://github.com/kubernetes/kubernetes/issues/38322.
|
|
PodInfraOOMAdj int = -998
|
|
CriticalPodOOMAdj int = -998
|
|
KubeletOOMScoreAdj int = -999
|
|
DockerOOMScoreAdj int = -999
|
|
KubeProxyOOMScoreAdj int = -999
|
|
guaranteedOOMScoreAdj int = -998
|
|
besteffortOOMScoreAdj int = 1000
|
|
)
|
|
|
|
// GetContainerOOMAdjust returns the amount by which the OOM score of all processes in the
|
|
// container should be adjusted.
|
|
// The OOM score of a process is the percentage of memory it consumes
|
|
// multiplied by 10 (barring exceptional cases) + a configurable quantity which is between -1000
|
|
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
|
|
// See https://lwn.net/Articles/391222/ for more information.
|
|
func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int {
|
|
if kubetypes.IsCriticalPod(pod) {
|
|
return CriticalPodOOMAdj
|
|
}
|
|
|
|
switch GetPodQOS(pod) {
|
|
case v1.PodQOSGuaranteed:
|
|
// Guaranteed containers should be the last to get killed.
|
|
return guaranteedOOMScoreAdj
|
|
case v1.PodQOSBestEffort:
|
|
return besteffortOOMScoreAdj
|
|
}
|
|
|
|
// Burstable containers are a middle tier, between Guaranteed and Best-Effort. Ideally,
|
|
// we want to protect Burstable containers that consume less memory than requested.
|
|
// The formula below is a heuristic. A container requesting for 10% of a system's
|
|
// memory will have an OOM score adjust of 900. If a process in container Y
|
|
// uses over 10% of memory, its OOM score will be 1000. The idea is that containers
|
|
// which use more than their request will have an OOM score of 1000 and will be prime
|
|
// targets for OOM kills.
|
|
// Note that this is a heuristic, it won't work if a container has many small processes.
|
|
memoryRequest := container.Resources.Requests.Memory().Value()
|
|
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
|
|
// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure
|
|
// that burstable pods have a higher OOM score adjustment.
|
|
if int(oomScoreAdjust) < (1000 + guaranteedOOMScoreAdj) {
|
|
return (1000 + guaranteedOOMScoreAdj)
|
|
}
|
|
// Give burstable pods a higher chance of survival over besteffort pods.
|
|
if int(oomScoreAdjust) == besteffortOOMScoreAdj {
|
|
return int(oomScoreAdjust - 1)
|
|
}
|
|
return int(oomScoreAdjust)
|
|
}
|