#!/bin/bash # Copyright 2015 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # !!!EXPERIMENTAL !!! Upgrade script for GCE. Expect this to get # rewritten in Go in relatively short order, but it allows us to start # testing the concepts. set -o errexit set -o nounset set -o pipefail if [[ "${KUBERNETES_PROVIDER:-gce}" != "gce" ]]; then echo "!!! ${1} only works on GCE" >&2 exit 1 fi KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../.. source "${KUBE_ROOT}/cluster/kube-util.sh" function usage() { echo "!!! EXPERIMENTAL !!!" echo "" echo "${0} [-M | -N | -P] [-o] (-l | )" echo " Upgrades master and nodes by default" echo " -M: Upgrade master only" echo " -N: Upgrade nodes only" echo " -P: Node upgrade prerequisites only (create a new instance template)" echo " -o: Use os distro sepcified in KUBE_NODE_OS_DISTRIBUTION for new nodes. Options include 'debian' or 'gci'" echo " -l: Use local(dev) binaries. This is only supported for master upgrades." echo "" echo ' Version number or publication is either a proper version number' echo ' (e.g. "v1.0.6", "v1.2.0-alpha.1.881+376438b69c7612") or a version' echo ' publication of the form / (e.g. "release/stable",' echo ' "ci/latest-1"). Some common ones are:' echo ' - "release/stable"' echo ' - "release/latest"' echo ' - "ci/latest"' echo ' See the docs on getting builds for more information about version publication.' echo "" echo "(... Fetching current release versions ...)" echo "" # NOTE: IF YOU CHANGE THE FOLLOWING LIST, ALSO UPDATE test/e2e/cluster_upgrade.go local release_stable local release_latest local ci_latest release_stable=$(gsutil cat gs://kubernetes-release/release/stable.txt) release_latest=$(gsutil cat gs://kubernetes-release/release/latest.txt) ci_latest=$(gsutil cat gs://kubernetes-release-dev/ci/latest.txt) echo "Right now, versions are as follows:" echo " release/stable: ${0} ${release_stable}" echo " release/latest: ${0} ${release_latest}" echo " ci/latest: ${0} ${ci_latest}" } function print-node-version-info() { echo "== $1 Node OS and Kubelet Versions ==" "${KUBE_ROOT}/cluster/kubectl.sh" get nodes -o=jsonpath='{range .items[*]}name: "{.metadata.name}", osImage: "{.status.nodeInfo.osImage}", kubeletVersion: "{.status.nodeInfo.kubeletVersion}"{"\n"}{end}' } function upgrade-master() { echo "== Upgrading master to '${SERVER_BINARY_TAR_URL}'. Do not interrupt, deleting master instance. ==" # Tries to figure out KUBE_USER/KUBE_PASSWORD by first looking under # kubeconfig:username, and then under kubeconfig:username-basic-auth. # TODO: KUBE_USER is used in generating ABAC policy which the # apiserver may not have enabled. If it's enabled, we must have a user # to generate a valid ABAC policy. If the username changes, should # the script fail? Should we generate a default username and password # if the section is missing in kubeconfig? Handle this better in 1.5. get-kubeconfig-basicauth get-kubeconfig-bearertoken detect-master parse-master-env # Delete the master instance. Note that the master-pd is created # with auto-delete=no, so it should not be deleted. gcloud compute instances delete \ --project "${PROJECT}" \ --quiet \ --zone "${ZONE}" \ "${MASTER_NAME}" create-master-instance "${MASTER_NAME}-ip" wait-for-master } function wait-for-master() { echo "== Waiting for new master to respond to API requests ==" local curl_auth_arg if [[ -n ${KUBE_BEARER_TOKEN:-} ]]; then curl_auth_arg=(-H "Authorization: Bearer ${KUBE_BEARER_TOKEN}") elif [[ -n ${KUBE_PASSWORD:-} ]]; then curl_auth_arg=(--user "${KUBE_USER}:${KUBE_PASSWORD}") else echo "can't get auth credentials for the current master" exit 1 fi until curl --insecure "${curl_auth_arg[@]}" --max-time 5 \ --fail --output /dev/null --silent "https://${KUBE_MASTER_IP}/healthz"; do printf "." sleep 2 done echo "== Done ==" } # Perform common upgrade setup tasks # # Assumed vars # KUBE_VERSION function prepare-upgrade() { ensure-temp-dir detect-project detect-node-names # sets INSTANCE_GROUPS write-cluster-name tars_from_version } # Reads kube-env metadata from first node in NODE_NAMES. # # Assumed vars: # NODE_NAMES # PROJECT # ZONE function get-node-env() { # TODO(zmerlynn): Make this more reliable with retries. gcloud compute --project ${PROJECT} ssh --zone ${ZONE} ${NODE_NAMES[0]} --command \ "curl --fail --silent -H 'Metadata-Flavor: Google' \ 'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null } # Read os distro information from /os/release on node. # $1: The name of node # # Assumed vars: # PROJECT # ZONE function get-node-os() { gcloud compute ssh "$1" \ --project "${PROJECT}" \ --zone "${ZONE}" \ --command \ "cat /etc/os-release | grep \"^ID=.*\" | cut -c 4-" } # Assumed vars: # KUBE_VERSION # NODE_SCOPES # NODE_INSTANCE_PREFIX # PROJECT # ZONE # # Vars set: # KUBELET_TOKEN # KUBE_PROXY_TOKEN # CA_CERT_BASE64 # EXTRA_DOCKER_OPTS # KUBELET_CERT_BASE64 # KUBELET_KEY_BASE64 function upgrade-nodes() { prepare-node-upgrade do-node-upgrade } function setup-base-image() { if [[ "${env_os_distro}" == "false" ]]; then echo "== Ensuring that new Node base OS image matched the existing Node base OS image" NODE_OS_DISTRIBUTION=$(get-node-os "${NODE_NAMES[0]}") source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh" # Reset the node image based on current os distro set-node-image fi } # prepare-node-upgrade creates a new instance template suitable for upgrading # to KUBE_VERSION and echos a single line with the name of the new template. # # Assumed vars: # KUBE_VERSION # NODE_SCOPES # NODE_INSTANCE_PREFIX # PROJECT # ZONE # # Vars set: # SANITIZED_VERSION # INSTANCE_GROUPS # KUBELET_TOKEN # KUBE_PROXY_TOKEN # CA_CERT_BASE64 # EXTRA_DOCKER_OPTS # KUBELET_CERT_BASE64 # KUBELET_KEY_BASE64 function prepare-node-upgrade() { echo "== Preparing node upgrade (to ${KUBE_VERSION}). ==" >&2 setup-base-image SANITIZED_VERSION=$(echo ${KUBE_VERSION} | sed 's/[\.\+]/-/g') # TODO(zmerlynn): Refactor setting scope flags. local scope_flags= if [ -n "${NODE_SCOPES}" ]; then scope_flags="--scopes ${NODE_SCOPES}" else scope_flags="--no-scopes" fi # Get required node env vars from exiting template. local node_env=$(get-node-env) KUBELET_TOKEN=$(get-env-val "${node_env}" "KUBELET_TOKEN") KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN") CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT") EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS") KUBELET_CERT_BASE64=$(get-env-val "${node_env}" "KUBELET_CERT") KUBELET_KEY_BASE64=$(get-env-val "${node_env}" "KUBELET_KEY") local master_env=$(get-master-env) KUBELET_AUTH_CA_CERT_BASE64=$(get-env-val "${master_env}" "KUBELET_AUTH_CA_CERT") # TODO(zmerlynn): How do we ensure kube-env is written in a ${version}- # compatible way? write-node-env # TODO(zmerlynn): Get configure-vm script from ${version}. (Must plumb this # through all create-node-instance-template implementations). local template_name=$(get-template-name-from-version ${SANITIZED_VERSION}) create-node-instance-template "${template_name}" # The following is echo'd so that callers can get the template name. echo "Instance template name: ${template_name}" echo "== Finished preparing node upgrade (to ${KUBE_VERSION}). ==" >&2 } # Prereqs: # - prepare-node-upgrade should have been called successfully function do-node-upgrade() { echo "== Upgrading nodes to ${KUBE_VERSION}. ==" >&2 # Do the actual upgrade. # NOTE(zmerlynn): If you are changing this gcloud command, update # test/e2e/cluster_upgrade.go to match this EXACTLY. local template_name=$(get-template-name-from-version ${SANITIZED_VERSION}) local old_templates=() local updates=() for group in ${INSTANCE_GROUPS[@]}; do old_templates+=($(gcloud compute instance-groups managed list \ --project="${PROJECT}" \ --zones="${ZONE}" \ --regexp="${group}" \ --format='value(instanceTemplate)' || true)) echo "== Calling rolling-update for ${group}. ==" >&2 update=$(gcloud alpha compute rolling-updates \ --project="${PROJECT}" \ --zone="${ZONE}" \ start \ --group="${group}" \ --template="${template_name}" \ --instance-startup-timeout=300s \ --max-num-concurrent-instances=1 \ --max-num-failed-instances=0 \ --min-instance-update-time=0s 2>&1) && update_rc=$? || update_rc=$? if [[ "${update_rc}" != 0 ]]; then echo "== FAILED to start rolling-update: ==" echo "${update}" echo " This may be due to a preexisting rolling-update;" echo " see https://github.com/kubernetes/kubernetes/issues/33113 for details." echo " All rolling-updates in project ${PROJECT} zone ${ZONE}:" gcloud alpha compute rolling-updates \ --project="${PROJECT}" \ --zone="${ZONE}" \ list || true return ${update_rc} fi id=$(echo "${update}" | grep "Started" | cut -d '/' -f 11 | cut -d ']' -f 1) updates+=("${id}") done echo "== Waiting for Upgrading nodes to be finished. ==" >&2 # Wait until rolling updates are finished. for update in ${updates[@]}; do while true; do result=$(gcloud alpha compute rolling-updates \ --project="${PROJECT}" \ --zone="${ZONE}" \ describe \ ${update} \ --format='value(status)' || true) if [ $result = "ROLLED_OUT" ]; then echo "Rolling update ${update} is ${result} state and finished." break fi echo "Rolling update ${update} is still in ${result} state." sleep 10 done done # Remove the old templates. echo "== Deleting old templates in ${PROJECT}. ==" >&2 for tmpl in ${old_templates[@]}; do gcloud compute instance-templates delete \ --quiet \ --project="${PROJECT}" \ "${tmpl}" || true done echo "== Finished upgrading nodes to ${KUBE_VERSION}. ==" >&2 } master_upgrade=true node_upgrade=true node_prereqs=false local_binaries=false env_os_distro=false while getopts ":MNPlho" opt; do case ${opt} in M) node_upgrade=false ;; N) master_upgrade=false ;; P) node_prereqs=true ;; l) local_binaries=true ;; o) env_os_distro=true ;; h) usage exit 0 ;; \?) echo "Invalid option: -$OPTARG" >&2 usage exit 1 ;; esac done shift $((OPTIND-1)) if [[ $# -gt 1 ]]; then echo "Error: Only one parameter () may be passed after the set of flags!" >&2 usage exit 1 fi if [[ $# -lt 1 ]] && [[ "${local_binaries}" == "false" ]]; then usage exit 1 fi if [[ "${master_upgrade}" == "false" ]] && [[ "${node_upgrade}" == "false" ]]; then echo "Can't specify both -M and -N" >&2 exit 1 fi print-node-version-info "Pre-Upgrade" if [[ "${local_binaries}" == "false" ]]; then set_binary_version ${1} fi prepare-upgrade if [[ "${node_prereqs}" == "true" ]]; then prepare-node-upgrade exit 0 fi if [[ "${master_upgrade}" == "true" ]]; then upgrade-master fi if [[ "${node_upgrade}" == "true" ]]; then if [[ "${local_binaries}" == "true" ]]; then echo "Upgrading nodes to local binaries is not yet supported." >&2 exit 1 else upgrade-nodes fi fi echo "== Validating cluster post-upgrade ==" "${KUBE_ROOT}/cluster/validate-cluster.sh" print-node-version-info "Post-Upgrade"