2017-01-31 16:45:59 -08:00
/ *
Copyright 2016 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
See the License for the specific language governing permissions and
limitations under the License .
* /
package kubelet
import (
2017-02-03 14:41:32 +01:00
2017-01-31 16:45:59 -08:00
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
const (
// the mark-for-masquerade chain
// TODO: clean up this logic in kube-proxy
KubeMarkMasqChain utiliptables . Chain = "KUBE-MARK-MASQ"
// the mark-for-drop chain
KubeMarkDropChain utiliptables . Chain = "KUBE-MARK-DROP"
// kubernetes postrouting rules
KubePostroutingChain utiliptables . Chain = "KUBE-POSTROUTING"
// kubernetes firewall rules
KubeFirewallChain utiliptables . Chain = "KUBE-FIREWALL"
// effectiveHairpinMode determines the effective hairpin mode given the
// configured mode, container runtime, and whether cbr0 should be configured.
func effectiveHairpinMode ( hairpinMode componentconfig . HairpinMode , containerRuntime string , networkPlugin string ) ( componentconfig . HairpinMode , error ) {
// The hairpin mode setting doesn't matter if:
// - We're not using a bridge network. This is hard to check because we might
// be using a plugin.
// - It's set to hairpin-veth for a container runtime that doesn't know how
// to set the hairpin flag on the veth's of containers. Currently the
// docker runtime is the only one that understands this.
// - It's set to "none".
if hairpinMode == componentconfig . PromiscuousBridge || hairpinMode == componentconfig . HairpinVeth {
// Only on docker.
if containerRuntime != "docker" {
glog . Warningf ( "Hairpin mode set to %q but container runtime is %q, ignoring" , hairpinMode , containerRuntime )
return componentconfig . HairpinNone , nil
if hairpinMode == componentconfig . PromiscuousBridge && networkPlugin != "kubenet" {
// This is not a valid combination, since promiscuous-bridge only works on kubenet. Users might be using the
// default values (from before the hairpin-mode flag existed) and we
// should keep the old behavior.
glog . Warningf ( "Hairpin mode set to %q but kubenet is not enabled, falling back to %q" , hairpinMode , componentconfig . HairpinVeth )
return componentconfig . HairpinVeth , nil
} else if hairpinMode != componentconfig . HairpinNone {
return "" , fmt . Errorf ( "unknown value: %q" , hairpinMode )
return hairpinMode , nil
// providerRequiresNetworkingConfiguration returns whether the cloud provider
// requires special networking configuration.
func ( kl * Kubelet ) providerRequiresNetworkingConfiguration ( ) bool {
// TODO: We should have a mechanism to say whether native cloud provider
// is used or whether we are using overlay networking. We should return
// true for cloud providers if they implement Routes() interface and
// we are not using overlay networking.
if kl . cloud == nil || kl . cloud . ProviderName ( ) != "gce" {
return false
_ , supported := kl . cloud . Routes ( )
return supported
2017-02-03 14:41:32 +01:00
func omitDuplicates ( kl * Kubelet , pod * v1 . Pod , combinedSearch [ ] string ) [ ] string {
uniqueDomains := map [ string ] bool { }
for _ , dnsDomain := range combinedSearch {
if _ , exists := uniqueDomains [ dnsDomain ] ; ! exists {
combinedSearch [ len ( uniqueDomains ) ] = dnsDomain
uniqueDomains [ dnsDomain ] = true
} else {
log := fmt . Sprintf ( "Found and omitted duplicated dns domain in host search line: '%s' during merging with cluster dns domains" , dnsDomain )
kl . recorder . Event ( pod , v1 . EventTypeWarning , "DNSSearchForming" , log )
glog . Error ( log )
return combinedSearch [ : len ( uniqueDomains ) ]
func formDNSSearchFitsLimits ( kl * Kubelet , pod * v1 . Pod , composedSearch [ ] string ) [ ] string {
// resolver file Search line current limitations
resolvSearchLineDNSDomainsLimit := 6
resolvSearchLineLenLimit := 255
limitsExceeded := false
if len ( composedSearch ) > resolvSearchLineDNSDomainsLimit {
composedSearch = composedSearch [ : resolvSearchLineDNSDomainsLimit ]
limitsExceeded = true
if resolvSearchhLineStrLen := len ( strings . Join ( composedSearch , " " ) ) ; resolvSearchhLineStrLen > resolvSearchLineLenLimit {
cutDomainsNum := 0
cutDoaminsLen := 0
for i := len ( composedSearch ) - 1 ; i >= 0 ; i -- {
cutDoaminsLen += len ( composedSearch [ i ] ) + 1
cutDomainsNum ++
if ( resolvSearchhLineStrLen - cutDoaminsLen ) <= resolvSearchLineLenLimit {
composedSearch = composedSearch [ : ( len ( composedSearch ) - cutDomainsNum ) ]
limitsExceeded = true
if limitsExceeded {
log := fmt . Sprintf ( "Search Line limits were exceeded, some dns names have been omitted, the applied search line is: %s" , strings . Join ( composedSearch , " " ) )
kl . recorder . Event ( pod , v1 . EventTypeWarning , "DNSSearchForming" , log )
glog . Error ( log )
return composedSearch
func ( kl * Kubelet ) formDNSSearchForDNSDefault ( hostSearch [ ] string , pod * v1 . Pod ) [ ] string {
return formDNSSearchFitsLimits ( kl , pod , hostSearch )
func ( kl * Kubelet ) formDNSSearch ( hostSearch [ ] string , pod * v1 . Pod ) [ ] string {
if kl . clusterDomain == "" {
formDNSSearchFitsLimits ( kl , pod , hostSearch )
return hostSearch
nsSvcDomain := fmt . Sprintf ( "%s.svc.%s" , pod . Namespace , kl . clusterDomain )
svcDomain := fmt . Sprintf ( "svc.%s" , kl . clusterDomain )
dnsSearch := [ ] string { nsSvcDomain , svcDomain , kl . clusterDomain }
combinedSearch := append ( dnsSearch , hostSearch ... )
combinedSearch = omitDuplicates ( kl , pod , combinedSearch )
return formDNSSearchFitsLimits ( kl , pod , combinedSearch )
func ( kl * Kubelet ) checkLimitsForResolvConf ( ) {
// resolver file Search line current limitations
resolvSearchLineDNSDomainsLimit := 6
resolvSearchLineLenLimit := 255
f , err := os . Open ( kl . resolverConfig )
if err != nil {
kl . recorder . Event ( kl . nodeRef , v1 . EventTypeWarning , "checkLimitsForResolvConf" , err . Error ( ) )
glog . Error ( "checkLimitsForResolvConf: " + err . Error ( ) )
defer f . Close ( )
_ , hostSearch , err := kl . parseResolvConf ( f )
if err != nil {
kl . recorder . Event ( kl . nodeRef , v1 . EventTypeWarning , "checkLimitsForResolvConf" , err . Error ( ) )
glog . Error ( "checkLimitsForResolvConf: " + err . Error ( ) )
domainCntLimit := resolvSearchLineDNSDomainsLimit
if kl . clusterDomain != "" {
domainCntLimit -= 3
if len ( hostSearch ) > domainCntLimit {
log := fmt . Sprintf ( "Resolv.conf file '%s' contains search line consisting of more than %d domains!" , kl . resolverConfig , domainCntLimit )
kl . recorder . Event ( kl . nodeRef , v1 . EventTypeWarning , "checkLimitsForResolvConf" , log )
glog . Error ( "checkLimitsForResolvConf: " + log )
if len ( strings . Join ( hostSearch , " " ) ) > resolvSearchLineLenLimit {
log := fmt . Sprintf ( "Resolv.conf file '%s' contains search line which length is more than allowed %d chars!" , kl . resolverConfig , resolvSearchLineLenLimit )
kl . recorder . Event ( kl . nodeRef , v1 . EventTypeWarning , "checkLimitsForResolvConf" , log )
glog . Error ( "checkLimitsForResolvConf: " + log )
2017-01-31 16:45:59 -08:00
// parseResolveConf reads a resolv.conf file from the given reader, and parses
// it into nameservers and searches, possibly returning an error.
// TODO: move to utility package
func ( kl * Kubelet ) parseResolvConf ( reader io . Reader ) ( nameservers [ ] string , searches [ ] string , err error ) {
file , err := ioutil . ReadAll ( reader )
if err != nil {
return nil , nil , err
// Lines of the form "nameserver" accumulate.
nameservers = [ ] string { }
// Lines of the form "search example.com" overrule - last one wins.
searches = [ ] string { }
lines := strings . Split ( string ( file ) , "\n" )
for l := range lines {
trimmed := strings . TrimSpace ( lines [ l ] )
if strings . HasPrefix ( trimmed , "#" ) {
fields := strings . Fields ( trimmed )
if len ( fields ) == 0 {
if fields [ 0 ] == "nameserver" {
nameservers = append ( nameservers , fields [ 1 : ] ... )
if fields [ 0 ] == "search" {
searches = fields [ 1 : ]
// There used to be code here to scrub DNS for each cloud, but doesn't
// make sense anymore since cloudproviders are being factored out.
// contact @thockin or @wlan0 for more information
return nameservers , searches , nil
// cleanupBandwidthLimits updates the status of bandwidth-limited containers
// and ensures that only the appropriate CIDRs are active on the node.
func ( kl * Kubelet ) cleanupBandwidthLimits ( allPods [ ] * v1 . Pod ) error {
if kl . shaper == nil {
return nil
currentCIDRs , err := kl . shaper . GetCIDRs ( )
if err != nil {
return err
possibleCIDRs := sets . String { }
for ix := range allPods {
pod := allPods [ ix ]
ingress , egress , err := bandwidth . ExtractPodBandwidthResources ( pod . Annotations )
if err != nil {
return err
if ingress == nil && egress == nil {
glog . V ( 8 ) . Infof ( "Not a bandwidth limited container..." )
status , found := kl . statusManager . GetPodStatus ( pod . UID )
if ! found {
// TODO(random-liu): Cleanup status get functions. (issue #20477)
s , err := kl . containerRuntime . GetPodStatus ( pod . UID , pod . Name , pod . Namespace )
if err != nil {
return err
status = kl . generateAPIPodStatus ( pod , s )
if status . Phase == v1 . PodRunning {
possibleCIDRs . Insert ( fmt . Sprintf ( "%s/32" , status . PodIP ) )
for _ , cidr := range currentCIDRs {
if ! possibleCIDRs . Has ( cidr ) {
glog . V ( 2 ) . Infof ( "Removing CIDR: %s (%v)" , cidr , possibleCIDRs )
if err := kl . shaper . Reset ( cidr ) ; err != nil {
return err
return nil
// syncNetworkStatus updates the network state
func ( kl * Kubelet ) syncNetworkStatus ( ) {
// For cri integration, network state will be updated in updateRuntimeUp,
// we'll get runtime network status through cri directly.
// TODO: Remove this once we completely switch to cri integration.
if kl . networkPlugin != nil {
kl . runtimeState . setNetworkState ( kl . networkPlugin . Status ( ) )
// updatePodCIDR updates the pod CIDR in the runtime state if it is different
// from the current CIDR.
func ( kl * Kubelet ) updatePodCIDR ( cidr string ) {
podCIDR := kl . runtimeState . podCIDR ( )
if podCIDR == cidr {
// kubelet -> network plugin
// cri runtime shims are responsible for their own network plugins
if kl . networkPlugin != nil {
details := make ( map [ string ] interface { } )
kl . networkPlugin . Event ( network . NET_PLUGIN_EVENT_POD_CIDR_CHANGE , details )
// kubelet -> generic runtime -> runtime shim -> network plugin
// docker/rkt non-cri implementations have a passthrough UpdatePodCIDR
if err := kl . GetRuntime ( ) . UpdatePodCIDR ( cidr ) ; err != nil {
glog . Errorf ( "Failed to update pod CIDR: %v" , err )
glog . Infof ( "Setting Pod CIDR: %v -> %v" , podCIDR , cidr )
kl . runtimeState . setPodCIDR ( cidr )
// shapingEnabled returns whether traffic shaping is enabled.
func ( kl * Kubelet ) shapingEnabled ( ) bool {
// Disable shaping if a network plugin is defined and supports shaping
if kl . networkPlugin != nil && kl . networkPlugin . Capabilities ( ) . Has ( network . NET_PLUGIN_CAPABILITY_SHAPING ) {
return false
// This is not strictly true but we need to figure out how to handle
// bandwidth shaping anyway. If the kubelet doesn't have a networkPlugin,
// it could mean:
// a. the kubelet is responsible for bandwidth shaping
// b. the kubelet is using cri, and the cri has a network plugin
// Today, the only plugin that understands bandwidth shaping is kubenet, and
// it doesn't support bandwidth shaping when invoked through cri, so it
// effectively boils down to letting the kubelet decide how to handle
// shaping annotations. The combination of (cri + network plugin that
// handles bandwidth shaping) may not work because of this.
return true
// syncNetworkUtil ensures the network utility are present on host.
// Network util includes:
// 1. In nat table, KUBE-MARK-DROP rule to mark connections for dropping
// Marked connection will be drop on INPUT/OUTPUT Chain in filter table
// 2. In nat table, KUBE-MARK-MASQ rule to mark connections for SNAT
// Marked connection will get SNAT on POSTROUTING Chain in nat table
func ( kl * Kubelet ) syncNetworkUtil ( ) {
if kl . iptablesMasqueradeBit < 0 || kl . iptablesMasqueradeBit > 31 {
glog . Errorf ( "invalid iptables-masquerade-bit %v not in [0, 31]" , kl . iptablesMasqueradeBit )
if kl . iptablesDropBit < 0 || kl . iptablesDropBit > 31 {
glog . Errorf ( "invalid iptables-drop-bit %v not in [0, 31]" , kl . iptablesDropBit )
if kl . iptablesDropBit == kl . iptablesMasqueradeBit {
glog . Errorf ( "iptables-masquerade-bit %v and iptables-drop-bit %v must be different" , kl . iptablesMasqueradeBit , kl . iptablesDropBit )
// Setup KUBE-MARK-DROP rules
dropMark := getIPTablesMark ( kl . iptablesDropBit )
if _ , err := kl . iptClient . EnsureChain ( utiliptables . TableNAT , KubeMarkDropChain ) ; err != nil {
glog . Errorf ( "Failed to ensure that %s chain %s exists: %v" , utiliptables . TableNAT , KubeMarkDropChain , err )
if _ , err := kl . iptClient . EnsureRule ( utiliptables . Append , utiliptables . TableNAT , KubeMarkDropChain , "-j" , "MARK" , "--set-xmark" , dropMark ) ; err != nil {
glog . Errorf ( "Failed to ensure marking rule for %v: %v" , KubeMarkDropChain , err )
if _ , err := kl . iptClient . EnsureChain ( utiliptables . TableFilter , KubeFirewallChain ) ; err != nil {
glog . Errorf ( "Failed to ensure that %s chain %s exists: %v" , utiliptables . TableFilter , KubeFirewallChain , err )
if _ , err := kl . iptClient . EnsureRule ( utiliptables . Append , utiliptables . TableFilter , KubeFirewallChain ,
"-m" , "comment" , "--comment" , "kubernetes firewall for dropping marked packets" ,
"-m" , "mark" , "--mark" , dropMark ,
"-j" , "DROP" ) ; err != nil {
glog . Errorf ( "Failed to ensure rule to drop packet marked by %v in %v chain %v: %v" , KubeMarkDropChain , utiliptables . TableFilter , KubeFirewallChain , err )
if _ , err := kl . iptClient . EnsureRule ( utiliptables . Prepend , utiliptables . TableFilter , utiliptables . ChainOutput , "-j" , string ( KubeFirewallChain ) ) ; err != nil {
glog . Errorf ( "Failed to ensure that %s chain %s jumps to %s: %v" , utiliptables . TableFilter , utiliptables . ChainOutput , KubeFirewallChain , err )
if _ , err := kl . iptClient . EnsureRule ( utiliptables . Prepend , utiliptables . TableFilter , utiliptables . ChainInput , "-j" , string ( KubeFirewallChain ) ) ; err != nil {
glog . Errorf ( "Failed to ensure that %s chain %s jumps to %s: %v" , utiliptables . TableFilter , utiliptables . ChainInput , KubeFirewallChain , err )
// Setup KUBE-MARK-MASQ rules
masqueradeMark := getIPTablesMark ( kl . iptablesMasqueradeBit )
if _ , err := kl . iptClient . EnsureChain ( utiliptables . TableNAT , KubeMarkMasqChain ) ; err != nil {
glog . Errorf ( "Failed to ensure that %s chain %s exists: %v" , utiliptables . TableNAT , KubeMarkMasqChain , err )
if _ , err := kl . iptClient . EnsureChain ( utiliptables . TableNAT , KubePostroutingChain ) ; err != nil {
glog . Errorf ( "Failed to ensure that %s chain %s exists: %v" , utiliptables . TableNAT , KubePostroutingChain , err )
if _ , err := kl . iptClient . EnsureRule ( utiliptables . Append , utiliptables . TableNAT , KubeMarkMasqChain , "-j" , "MARK" , "--set-xmark" , masqueradeMark ) ; err != nil {
glog . Errorf ( "Failed to ensure marking rule for %v: %v" , KubeMarkMasqChain , err )
if _ , err := kl . iptClient . EnsureRule ( utiliptables . Prepend , utiliptables . TableNAT , utiliptables . ChainPostrouting ,
"-m" , "comment" , "--comment" , "kubernetes postrouting rules" , "-j" , string ( KubePostroutingChain ) ) ; err != nil {
glog . Errorf ( "Failed to ensure that %s chain %s jumps to %s: %v" , utiliptables . TableNAT , utiliptables . ChainPostrouting , KubePostroutingChain , err )
if _ , err := kl . iptClient . EnsureRule ( utiliptables . Append , utiliptables . TableNAT , KubePostroutingChain ,
"-m" , "comment" , "--comment" , "kubernetes service traffic requiring SNAT" ,
"-m" , "mark" , "--mark" , masqueradeMark , "-j" , "MASQUERADE" ) ; err != nil {
glog . Errorf ( "Failed to ensure SNAT rule for packets marked by %v in %v chain %v: %v" , KubeMarkMasqChain , utiliptables . TableNAT , KubePostroutingChain , err )
// getIPTablesMark returns the fwmark given the bit
func getIPTablesMark ( bit int ) string {
value := 1 << uint ( bit )
return fmt . Sprintf ( "%#08x/%#08x" , value , value )