upgrade.sh 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. #!/usr/bin/env bash
  2. # Copyright 2015 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # !!!EXPERIMENTAL !!! Upgrade script for GCE. Expect this to get
  16. # rewritten in Go in relatively short order, but it allows us to start
  17. # testing the concepts.
  18. set -o errexit
  19. set -o nounset
  20. set -o pipefail
  21. if [[ "${KUBERNETES_PROVIDER:-gce}" != "gce" ]]; then
  22. echo "!!! ${1} only works on GCE" >&2
  23. exit 1
  24. fi
  25. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
  26. source "${KUBE_ROOT}/hack/lib/util.sh"
  27. source "${KUBE_ROOT}/cluster/kube-util.sh"
  28. function usage() {
  29. echo "!!! EXPERIMENTAL !!!"
  30. echo ""
  31. echo "${0} [-M | -N | -P] [-o] (-l | <version number or publication>)"
  32. echo " Upgrades master and nodes by default"
  33. echo " -M: Upgrade master only"
  34. echo " -N: Upgrade nodes only"
  35. echo " -P: Node upgrade prerequisites only (create a new instance template)"
  36. echo " -c: Upgrade NODE_UPGRADE_PARALLELISM nodes in parallel (default=1) within a single instance group. The MIGs themselves are dealt serially."
  37. echo " -o: Use os distro specified in KUBE_NODE_OS_DISTRIBUTION for new nodes. Options include 'debian' or 'gci'"
  38. echo " -l: Use local(dev) binaries. This is only supported for master upgrades."
  39. echo ""
  40. echo ' Version number or publication is either a proper version number'
  41. echo ' (e.g. "v1.0.6", "v1.2.0-alpha.1.881+376438b69c7612") or a version'
  42. echo ' publication of the form <bucket>/<version> (e.g. "release/stable",'
  43. echo ' "ci/latest-1"). Some common ones are:'
  44. echo ' - "release/stable"'
  45. echo ' - "release/latest"'
  46. echo ' - "ci/latest"'
  47. echo ' See the docs on getting builds for more information about version publication.'
  48. echo ""
  49. echo "(... Fetching current release versions ...)"
  50. echo ""
  51. # NOTE: IF YOU CHANGE THE FOLLOWING LIST, ALSO UPDATE test/e2e/cluster_upgrade.go
  52. local release_stable
  53. local release_latest
  54. local ci_latest
  55. release_stable=$(gsutil cat gs://kubernetes-release/release/stable.txt)
  56. release_latest=$(gsutil cat gs://kubernetes-release/release/latest.txt)
  57. ci_latest=$(gsutil cat gs://kubernetes-release-dev/ci/latest.txt)
  58. echo "Right now, versions are as follows:"
  59. echo " release/stable: ${0} ${release_stable}"
  60. echo " release/latest: ${0} ${release_latest}"
  61. echo " ci/latest: ${0} ${ci_latest}"
  62. }
  63. function print-node-version-info() {
  64. echo "== $1 Node OS and Kubelet Versions =="
  65. "${KUBE_ROOT}/cluster/kubectl.sh" get nodes -o=jsonpath='{range .items[*]}name: "{.metadata.name}", osImage: "{.status.nodeInfo.osImage}", kubeletVersion: "{.status.nodeInfo.kubeletVersion}"{"\n"}{end}'
  66. }
  67. function upgrade-master() {
  68. local num_masters
  69. num_masters=$(get-master-replicas-count)
  70. if [[ "${num_masters}" -gt 1 ]]; then
  71. echo "Upgrade of master not supported if more than one master replica present. The current number of master replicas: ${num_masters}"
  72. exit 1
  73. fi
  74. echo "== Upgrading master to '${SERVER_BINARY_TAR_URL}'. Do not interrupt, deleting master instance. =="
  75. # Tries to figure out KUBE_USER/KUBE_PASSWORD by first looking under
  76. # kubeconfig:username, and then under kubeconfig:username-basic-auth.
  77. # TODO: KUBE_USER is used in generating ABAC policy which the
  78. # apiserver may not have enabled. If it's enabled, we must have a user
  79. # to generate a valid ABAC policy. If the username changes, should
  80. # the script fail? Should we generate a default username and password
  81. # if the section is missing in kubeconfig? Handle this better in 1.5.
  82. get-kubeconfig-basicauth
  83. get-kubeconfig-bearertoken
  84. detect-master
  85. parse-master-env
  86. upgrade-master-env
  87. # Delete the master instance. Note that the master-pd is created
  88. # with auto-delete=no, so it should not be deleted.
  89. gcloud compute instances delete \
  90. --project "${PROJECT}" \
  91. --quiet \
  92. --zone "${ZONE}" \
  93. "${MASTER_NAME}"
  94. create-master-instance "${MASTER_NAME}-ip"
  95. wait-for-master
  96. }
  97. function upgrade-master-env() {
  98. echo "== Upgrading master environment variables. =="
  99. # Generate the node problem detector token if it isn't present on the original
  100. # master.
  101. if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
  102. NODE_PROBLEM_DETECTOR_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
  103. fi
  104. }
  105. function wait-for-master() {
  106. echo "== Waiting for new master to respond to API requests =="
  107. local curl_auth_arg
  108. if [[ -n ${KUBE_BEARER_TOKEN:-} ]]; then
  109. curl_auth_arg=(-H "Authorization: Bearer ${KUBE_BEARER_TOKEN}")
  110. elif [[ -n ${KUBE_PASSWORD:-} ]]; then
  111. curl_auth_arg=(--user "${KUBE_USER}:${KUBE_PASSWORD}")
  112. else
  113. echo "can't get auth credentials for the current master"
  114. exit 1
  115. fi
  116. until curl --insecure "${curl_auth_arg[@]}" --max-time 5 \
  117. --fail --output /dev/null --silent "https://${KUBE_MASTER_IP}/healthz"; do
  118. printf "."
  119. sleep 2
  120. done
  121. echo "== Done =="
  122. }
  123. # Perform common upgrade setup tasks
  124. #
  125. # Assumed vars
  126. # KUBE_VERSION
  127. function prepare-upgrade() {
  128. kube::util::ensure-temp-dir
  129. detect-project
  130. detect-subnetworks
  131. detect-node-names # sets INSTANCE_GROUPS
  132. write-cluster-location
  133. write-cluster-name
  134. tars_from_version
  135. }
  136. # Reads kube-env metadata from first node in NODE_NAMES.
  137. #
  138. # Assumed vars:
  139. # NODE_NAMES
  140. # PROJECT
  141. # ZONE
  142. function get-node-env() {
  143. # TODO(zmerlynn): Make this more reliable with retries.
  144. gcloud compute --project ${PROJECT} ssh --zone ${ZONE} ${NODE_NAMES[0]} --command \
  145. "curl --fail --silent -H 'Metadata-Flavor: Google' \
  146. 'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null
  147. }
  148. # Read os distro information from /os/release on node.
  149. # $1: The name of node
  150. #
  151. # Assumed vars:
  152. # PROJECT
  153. # ZONE
  154. function get-node-os() {
  155. gcloud compute ssh "$1" \
  156. --project "${PROJECT}" \
  157. --zone "${ZONE}" \
  158. --command \
  159. "cat /etc/os-release | grep \"^ID=.*\" | cut -c 4-"
  160. }
  161. # Assumed vars:
  162. # KUBE_VERSION
  163. # NODE_SCOPES
  164. # NODE_INSTANCE_PREFIX
  165. # PROJECT
  166. # ZONE
  167. #
  168. # Vars set:
  169. # KUBE_PROXY_TOKEN
  170. # NODE_PROBLEM_DETECTOR_TOKEN
  171. # CA_CERT_BASE64
  172. # EXTRA_DOCKER_OPTS
  173. # KUBELET_CERT_BASE64
  174. # KUBELET_KEY_BASE64
  175. function upgrade-nodes() {
  176. prepare-node-upgrade
  177. do-node-upgrade
  178. }
  179. function setup-base-image() {
  180. if [[ "${env_os_distro}" == "false" ]]; then
  181. echo "== Ensuring that new Node base OS image matched the existing Node base OS image"
  182. NODE_OS_DISTRIBUTION=$(get-node-os "${NODE_NAMES[0]}")
  183. if [[ "${NODE_OS_DISTRIBUTION}" == "cos" ]]; then
  184. NODE_OS_DISTRIBUTION="gci"
  185. fi
  186. source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh"
  187. # Reset the node image based on current os distro
  188. set-linux-node-image
  189. fi
  190. }
  191. # prepare-node-upgrade creates a new instance template suitable for upgrading
  192. # to KUBE_VERSION and echos a single line with the name of the new template.
  193. #
  194. # Assumed vars:
  195. # KUBE_VERSION
  196. # NODE_SCOPES
  197. # NODE_INSTANCE_PREFIX
  198. # PROJECT
  199. # ZONE
  200. #
  201. # Vars set:
  202. # SANITIZED_VERSION
  203. # INSTANCE_GROUPS
  204. # KUBE_PROXY_TOKEN
  205. # NODE_PROBLEM_DETECTOR_TOKEN
  206. # CA_CERT_BASE64
  207. # EXTRA_DOCKER_OPTS
  208. # KUBELET_CERT_BASE64
  209. # KUBELET_KEY_BASE64
  210. function prepare-node-upgrade() {
  211. echo "== Preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
  212. setup-base-image
  213. SANITIZED_VERSION=$(echo ${KUBE_VERSION} | sed 's/[\.\+]/-/g')
  214. # TODO(zmerlynn): Refactor setting scope flags.
  215. local scope_flags=
  216. if [ -n "${NODE_SCOPES}" ]; then
  217. scope_flags="--scopes ${NODE_SCOPES}"
  218. else
  219. scope_flags="--no-scopes"
  220. fi
  221. # Get required node env vars from exiting template.
  222. local node_env=$(get-node-env)
  223. KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN")
  224. NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${node_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
  225. CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT")
  226. EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS")
  227. KUBELET_CERT_BASE64=$(get-env-val "${node_env}" "KUBELET_CERT")
  228. KUBELET_KEY_BASE64=$(get-env-val "${node_env}" "KUBELET_KEY")
  229. upgrade-node-env
  230. # TODO(zmerlynn): How do we ensure kube-env is written in a ${version}-
  231. # compatible way?
  232. write-linux-node-env
  233. # TODO(zmerlynn): Get configure-vm script from ${version}. (Must plumb this
  234. # through all create-linux-node-instance-template implementations).
  235. local template_name=$(get-template-name-from-version ${SANITIZED_VERSION} ${NODE_INSTANCE_PREFIX})
  236. create-linux-node-instance-template "${template_name}"
  237. # The following is echo'd so that callers can get the template name.
  238. echo "Instance template name: ${template_name}"
  239. echo "== Finished preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
  240. }
  241. function upgrade-node-env() {
  242. echo "== Upgrading node environment variables. =="
  243. # Get the node problem detector token from master if it isn't present on
  244. # the original node.
  245. if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
  246. detect-master
  247. local master_env=$(get-master-env)
  248. NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${master_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
  249. fi
  250. }
  251. # Upgrades a single node.
  252. # $1: The name of the node
  253. #
  254. # Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe.
  255. function do-single-node-upgrade() {
  256. local -r instance="$1"
  257. local kubectl_rc
  258. local boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
  259. if [[ "${kubectl_rc}" != 0 ]]; then
  260. echo "== FAILED to get bootID ${instance} =="
  261. echo "${boot_id}"
  262. return ${kubectl_rc}
  263. fi
  264. # Drain node
  265. echo "== Draining ${instance}. == " >&2
  266. local drain_rc
  267. "${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
  268. && drain_rc=$? || drain_rc=$?
  269. if [[ "${drain_rc}" != 0 ]]; then
  270. echo "== FAILED to drain ${instance} =="
  271. return ${drain_rc}
  272. fi
  273. # Recreate instance
  274. echo "== Recreating instance ${instance}. ==" >&2
  275. local recreate_rc
  276. local recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
  277. --project="${PROJECT}" \
  278. --zone="${ZONE}" \
  279. --instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
  280. if [[ "${recreate_rc}" != 0 ]]; then
  281. echo "== FAILED to recreate ${instance} =="
  282. echo "${recreate}"
  283. return ${recreate_rc}
  284. fi
  285. # Wait for node status to reflect a new boot ID. This guarantees us
  286. # that the node status in the API is from a different boot. This
  287. # does not guarantee that the status is from the upgraded node, but
  288. # it is a best effort approximation.
  289. echo "== Waiting for new node to be added to k8s. ==" >&2
  290. while true; do
  291. local new_boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
  292. if [[ "${kubectl_rc}" != 0 ]]; then
  293. echo "== FAILED to get node ${instance} =="
  294. echo "${boot_id}"
  295. echo " (Will retry.)"
  296. elif [[ "${boot_id}" != "${new_boot_id}" ]]; then
  297. echo "Node ${instance} recreated."
  298. break
  299. else
  300. echo -n .
  301. fi
  302. sleep 1
  303. done
  304. # Wait for the node to have Ready=True.
  305. echo "== Waiting for ${instance} to become ready. ==" >&2
  306. while true; do
  307. local ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
  308. if [[ "${ready}" != 'True' ]]; then
  309. echo "Node ${instance} is still not ready: Ready=${ready}"
  310. else
  311. echo "Node ${instance} Ready=${ready}"
  312. break
  313. fi
  314. sleep 1
  315. done
  316. # Uncordon the node.
  317. echo "== Uncordon ${instance}. == " >&2
  318. local uncordon_rc
  319. "${KUBE_ROOT}/cluster/kubectl.sh" uncordon "${instance}" \
  320. && uncordon_rc=$? || uncordon_rc=$?
  321. if [[ "${uncordon_rc}" != 0 ]]; then
  322. echo "== FAILED to uncordon ${instance} =="
  323. return ${uncordon_rc}
  324. fi
  325. }
  326. # Prereqs:
  327. # - prepare-node-upgrade should have been called successfully
  328. function do-node-upgrade() {
  329. echo "== Upgrading nodes to ${KUBE_VERSION} with max parallelism of ${node_upgrade_parallelism}. ==" >&2
  330. # Do the actual upgrade.
  331. # NOTE(zmerlynn): If you are changing this gcloud command, update
  332. # test/e2e/cluster_upgrade.go to match this EXACTLY.
  333. local template_name=$(get-template-name-from-version ${SANITIZED_VERSION} ${NODE_INSTANCE_PREFIX})
  334. local old_templates=()
  335. local updates=()
  336. for group in ${INSTANCE_GROUPS[@]}; do
  337. old_templates+=($(gcloud compute instance-groups managed list \
  338. --project="${PROJECT}" \
  339. --filter="name ~ '${group}' AND zone:(${ZONE})" \
  340. --format='value(instanceTemplate)' || true))
  341. set_instance_template_out=$(gcloud compute instance-groups managed set-instance-template "${group}" \
  342. --template="${template_name}" \
  343. --project="${PROJECT}" \
  344. --zone="${ZONE}" 2>&1) && set_instance_template_rc=$? || set_instance_template_rc=$?
  345. if [[ "${set_instance_template_rc}" != 0 ]]; then
  346. echo "== FAILED to set-instance-template for ${group} to ${template_name} =="
  347. echo "${set_instance_template_out}"
  348. return ${set_instance_template_rc}
  349. fi
  350. instances=()
  351. instances+=($(gcloud compute instance-groups managed list-instances "${group}" \
  352. --format='value(instance)' \
  353. --project="${PROJECT}" \
  354. --zone="${ZONE}" 2>&1)) && list_instances_rc=$? || list_instances_rc=$?
  355. if [[ "${list_instances_rc}" != 0 ]]; then
  356. echo "== FAILED to list instances in group ${group} =="
  357. echo "${instances}"
  358. return ${list_instances_rc}
  359. fi
  360. process_count_left=${node_upgrade_parallelism}
  361. pids=()
  362. ret_code_sum=0 # Should stay 0 in the loop iff all parallel node upgrades succeed.
  363. for instance in ${instances[@]}; do
  364. do-single-node-upgrade "${instance}" & pids+=("$!")
  365. # We don't want to run more than ${node_upgrade_parallelism} upgrades at a time,
  366. # so wait once we hit that many nodes. This isn't ideal, since one might take much
  367. # longer than the others, but it should help.
  368. process_count_left=$((process_count_left - 1))
  369. if [[ process_count_left -eq 0 || "${instance}" == "${instances[-1]}" ]]; then
  370. # Wait for each of the parallel node upgrades to finish.
  371. for pid in "${pids[@]}"; do
  372. wait $pid
  373. ret_code_sum=$(( ret_code_sum + $? ))
  374. done
  375. # Return even if at least one of the node upgrades failed.
  376. if [[ ${ret_code_sum} != 0 ]]; then
  377. echo "== Some of the ${node_upgrade_parallelism} parallel node upgrades failed. =="
  378. return ${ret_code_sum}
  379. fi
  380. process_count_left=${node_upgrade_parallelism}
  381. fi
  382. done
  383. done
  384. # Remove the old templates.
  385. echo "== Deleting old templates in ${PROJECT}. ==" >&2
  386. for tmpl in ${old_templates[@]}; do
  387. gcloud compute instance-templates delete \
  388. --quiet \
  389. --project="${PROJECT}" \
  390. "${tmpl}" || true
  391. done
  392. echo "== Finished upgrading nodes to ${KUBE_VERSION}. ==" >&2
  393. }
  394. master_upgrade=true
  395. node_upgrade=true
  396. node_prereqs=false
  397. local_binaries=false
  398. env_os_distro=false
  399. node_upgrade_parallelism=1
  400. while getopts ":MNPlcho" opt; do
  401. case ${opt} in
  402. M)
  403. node_upgrade=false
  404. ;;
  405. N)
  406. master_upgrade=false
  407. ;;
  408. P)
  409. node_prereqs=true
  410. ;;
  411. l)
  412. local_binaries=true
  413. ;;
  414. c)
  415. node_upgrade_parallelism=${NODE_UPGRADE_PARALLELISM:-1}
  416. ;;
  417. o)
  418. env_os_distro=true
  419. ;;
  420. h)
  421. usage
  422. exit 0
  423. ;;
  424. \?)
  425. echo "Invalid option: -$OPTARG" >&2
  426. usage
  427. exit 1
  428. ;;
  429. esac
  430. done
  431. shift $((OPTIND-1))
  432. if [[ $# -gt 1 ]]; then
  433. echo "Error: Only one parameter (<version number or publication>) may be passed after the set of flags!" >&2
  434. usage
  435. exit 1
  436. fi
  437. if [[ $# -lt 1 ]] && [[ "${local_binaries}" == "false" ]]; then
  438. usage
  439. exit 1
  440. fi
  441. if [[ "${master_upgrade}" == "false" ]] && [[ "${node_upgrade}" == "false" ]]; then
  442. echo "Can't specify both -M and -N" >&2
  443. exit 1
  444. fi
  445. # prompt if etcd storage media type isn't set unless using etcd2 when doing master upgrade
  446. if [[ -z "${STORAGE_MEDIA_TYPE:-}" ]] && [[ "${STORAGE_BACKEND:-}" != "etcd2" ]] && [[ "${master_upgrade}" == "true" ]]; then
  447. echo "The default etcd storage media type in 1.6 has changed from application/json to application/vnd.kubernetes.protobuf."
  448. echo "Documentation about the change can be found at https://kubernetes.io/docs/admin/etcd_upgrade."
  449. echo ""
  450. echo "ETCD2 DOES NOT SUPPORT PROTOBUF: If you wish to have to ability to downgrade to etcd2 later application/json must be used."
  451. echo ""
  452. echo "It's HIGHLY recommended that etcd be backed up before this step!!"
  453. echo ""
  454. echo "To enable using json, before running this script set:"
  455. echo "export STORAGE_MEDIA_TYPE=application/json"
  456. echo ""
  457. if [ -t 0 ] && [ -t 1 ]; then
  458. read -p "Would you like to continue with the new default, and lose the ability to downgrade to etcd2? [y/N] " confirm
  459. if [[ "${confirm}" != "y" ]]; then
  460. exit 1
  461. fi
  462. else
  463. echo "To enable using protobuf, before running this script set:"
  464. echo "export STORAGE_MEDIA_TYPE=application/vnd.kubernetes.protobuf"
  465. echo ""
  466. echo "STORAGE_MEDIA_TYPE must be specified when run non-interactively." >&2
  467. exit 1
  468. fi
  469. fi
  470. # Prompt if etcd image/version is unspecified when doing master upgrade.
  471. # In e2e tests, we use TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true to skip this
  472. # prompt, simulating the behavior when the user confirms interactively.
  473. # All other automated use of this script should explicitly specify a version.
  474. if [[ "${master_upgrade}" == "true" ]]; then
  475. if [[ -z "${ETCD_IMAGE:-}" && -z "${TEST_ETCD_IMAGE:-}" ]] || [[ -z "${ETCD_VERSION:-}" && -z "${TEST_ETCD_VERSION:-}" ]]; then
  476. echo
  477. echo "***WARNING***"
  478. echo "Upgrading Kubernetes with this script might result in an upgrade to a new etcd version."
  479. echo "Some etcd version upgrades, such as 3.0.x to 3.1.x, DO NOT offer a downgrade path."
  480. echo "To pin the etcd version to your current one (e.g. v3.0.17), set the following variables"
  481. echo "before running this script:"
  482. echo
  483. echo "# example: pin to etcd v3.0.17"
  484. echo "export ETCD_IMAGE=3.0.17"
  485. echo "export ETCD_VERSION=3.0.17"
  486. echo
  487. echo "Alternatively, if you choose to allow an etcd upgrade that doesn't support downgrade,"
  488. echo "you might still be able to downgrade Kubernetes by pinning to the newer etcd version."
  489. echo "In all cases, it is strongly recommended to have an etcd backup before upgrading."
  490. echo
  491. if [ -t 0 ] && [ -t 1 ]; then
  492. read -p "Continue with default etcd version, which might upgrade etcd? [y/N] " confirm
  493. if [[ "${confirm}" != "y" ]]; then
  494. exit 1
  495. fi
  496. elif [[ "${TEST_ALLOW_IMPLICIT_ETCD_UPGRADE:-}" != "true" ]]; then
  497. echo "ETCD_IMAGE and ETCD_VERSION must be specified when run non-interactively." >&2
  498. exit 1
  499. fi
  500. fi
  501. fi
  502. print-node-version-info "Pre-Upgrade"
  503. if [[ "${local_binaries}" == "false" ]]; then
  504. set_binary_version ${1}
  505. fi
  506. prepare-upgrade
  507. if [[ "${node_prereqs}" == "true" ]]; then
  508. prepare-node-upgrade
  509. exit 0
  510. fi
  511. if [[ "${master_upgrade}" == "true" ]]; then
  512. upgrade-master
  513. fi
  514. if [[ "${node_upgrade}" == "true" ]]; then
  515. if [[ "${local_binaries}" == "true" ]]; then
  516. echo "Upgrading nodes to local binaries is not yet supported." >&2
  517. exit 1
  518. else
  519. upgrade-nodes
  520. fi
  521. fi
  522. echo "== Validating cluster post-upgrade =="
  523. "${KUBE_ROOT}/cluster/validate-cluster.sh"
  524. print-node-version-info "Post-Upgrade"