upgrade.sh 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. #!/usr/bin/env bash
  2. # Copyright 2015 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # !!!EXPERIMENTAL !!! Upgrade script for GCE. Expect this to get
  16. # rewritten in Go in relatively short order, but it allows us to start
  17. # testing the concepts.
  18. set -o errexit
  19. set -o nounset
  20. set -o pipefail
  21. if [[ "${KUBERNETES_PROVIDER:-gce}" != "gce" ]]; then
  22. echo "!!! ${1} only works on GCE" >&2
  23. exit 1
  24. fi
  25. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
  26. source "${KUBE_ROOT}/hack/lib/util.sh"
  27. source "${KUBE_ROOT}/cluster/kube-util.sh"
  28. function usage() {
  29. echo "!!! EXPERIMENTAL !!!"
  30. echo "!!! This upgrade script is not meant to be run in production !!!"
  31. echo ""
  32. echo "${0} [-M | -N | -P] [-o] (-l | <version number or publication>)"
  33. echo " Upgrades master and nodes by default"
  34. echo " -M: Upgrade master only"
  35. echo " -N: Upgrade nodes only"
  36. echo " -P: Node upgrade prerequisites only (create a new instance template)"
  37. echo " -c: Upgrade NODE_UPGRADE_PARALLELISM nodes in parallel (default=1) within a single instance group. The MIGs themselves are dealt serially."
  38. echo " -o: Use os distro specified in KUBE_NODE_OS_DISTRIBUTION for new nodes. Options include 'debian' or 'gci'"
  39. echo " -l: Use local(dev) binaries. This is only supported for master upgrades."
  40. echo ""
  41. echo ' Version number or publication is either a proper version number'
  42. echo ' (e.g. "v1.0.6", "v1.2.0-alpha.1.881+376438b69c7612") or a version'
  43. echo ' publication of the form <bucket>/<version> (e.g. "release/stable",'
  44. echo ' "ci/latest-1"). Some common ones are:'
  45. echo ' - "release/stable"'
  46. echo ' - "release/latest"'
  47. echo ' - "ci/latest"'
  48. echo ' See the docs on getting builds for more information about version publication.'
  49. echo ""
  50. echo "(... Fetching current release versions ...)"
  51. echo ""
  52. # NOTE: IF YOU CHANGE THE FOLLOWING LIST, ALSO UPDATE test/e2e/cluster_upgrade.go
  53. local release_stable
  54. local release_latest
  55. local ci_latest
  56. release_stable=$(gsutil cat gs://kubernetes-release/release/stable.txt)
  57. release_latest=$(gsutil cat gs://kubernetes-release/release/latest.txt)
  58. ci_latest=$(gsutil cat gs://kubernetes-release-dev/ci/latest.txt)
  59. echo "Right now, versions are as follows:"
  60. echo " release/stable: ${0} ${release_stable}"
  61. echo " release/latest: ${0} ${release_latest}"
  62. echo " ci/latest: ${0} ${ci_latest}"
  63. }
  64. function print-node-version-info() {
  65. echo "== $1 Node OS and Kubelet Versions =="
  66. "${KUBE_ROOT}/cluster/kubectl.sh" get nodes -o=jsonpath='{range .items[*]}name: "{.metadata.name}", osImage: "{.status.nodeInfo.osImage}", kubeletVersion: "{.status.nodeInfo.kubeletVersion}"{"\n"}{end}'
  67. }
  68. function upgrade-master() {
  69. local num_masters
  70. num_masters=$(get-master-replicas-count)
  71. if [[ "${num_masters}" -gt 1 ]]; then
  72. echo "Upgrade of master not supported if more than one master replica present. The current number of master replicas: ${num_masters}"
  73. exit 1
  74. fi
  75. echo "== Upgrading master to '${SERVER_BINARY_TAR_URL}'. Do not interrupt, deleting master instance. =="
  76. # Tries to figure out KUBE_USER/KUBE_PASSWORD by first looking under
  77. # kubeconfig:username, and then under kubeconfig:username-basic-auth.
  78. # TODO: KUBE_USER is used in generating ABAC policy which the
  79. # apiserver may not have enabled. If it's enabled, we must have a user
  80. # to generate a valid ABAC policy. If the username changes, should
  81. # the script fail? Should we generate a default username and password
  82. # if the section is missing in kubeconfig? Handle this better in 1.5.
  83. get-kubeconfig-basicauth
  84. get-kubeconfig-bearertoken
  85. detect-master
  86. parse-master-env
  87. upgrade-master-env
  88. # Delete the master instance. Note that the master-pd is created
  89. # with auto-delete=no, so it should not be deleted.
  90. gcloud compute instances delete \
  91. --project "${PROJECT}" \
  92. --quiet \
  93. --zone "${ZONE}" \
  94. "${MASTER_NAME}"
  95. create-master-instance "${MASTER_NAME}-ip"
  96. wait-for-master
  97. }
  98. function upgrade-master-env() {
  99. echo "== Upgrading master environment variables. =="
  100. # Generate the node problem detector token if it isn't present on the original
  101. # master.
  102. if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
  103. NODE_PROBLEM_DETECTOR_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
  104. fi
  105. }
  106. function wait-for-master() {
  107. echo "== Waiting for new master to respond to API requests =="
  108. local curl_auth_arg
  109. if [[ -n ${KUBE_BEARER_TOKEN:-} ]]; then
  110. curl_auth_arg=(-H "Authorization: Bearer ${KUBE_BEARER_TOKEN}")
  111. elif [[ -n ${KUBE_PASSWORD:-} ]]; then
  112. curl_auth_arg=(--user "${KUBE_USER}:${KUBE_PASSWORD}")
  113. else
  114. echo "can't get auth credentials for the current master"
  115. exit 1
  116. fi
  117. until curl --insecure "${curl_auth_arg[@]}" --max-time 5 \
  118. --fail --output /dev/null --silent "https://${KUBE_MASTER_IP}/healthz"; do
  119. printf "."
  120. sleep 2
  121. done
  122. echo "== Done =="
  123. }
  124. # Perform common upgrade setup tasks
  125. #
  126. # Assumed vars
  127. # KUBE_VERSION
  128. function prepare-upgrade() {
  129. kube::util::ensure-temp-dir
  130. detect-project
  131. detect-subnetworks
  132. detect-node-names # sets INSTANCE_GROUPS
  133. write-cluster-location
  134. write-cluster-name
  135. tars_from_version
  136. }
  137. # Reads kube-env metadata from first node in NODE_NAMES.
  138. #
  139. # Assumed vars:
  140. # NODE_NAMES
  141. # PROJECT
  142. # ZONE
  143. function get-node-env() {
  144. # TODO(zmerlynn): Make this more reliable with retries.
  145. gcloud compute --project ${PROJECT} ssh --zone ${ZONE} ${NODE_NAMES[0]} --command \
  146. "curl --fail --silent -H 'Metadata-Flavor: Google' \
  147. 'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null
  148. }
  149. # Read os distro information from /os/release on node.
  150. # $1: The name of node
  151. #
  152. # Assumed vars:
  153. # PROJECT
  154. # ZONE
  155. function get-node-os() {
  156. gcloud compute ssh "$1" \
  157. --project "${PROJECT}" \
  158. --zone "${ZONE}" \
  159. --command \
  160. "cat /etc/os-release | grep \"^ID=.*\" | cut -c 4-"
  161. }
  162. # Assumed vars:
  163. # KUBE_VERSION
  164. # NODE_SCOPES
  165. # NODE_INSTANCE_PREFIX
  166. # PROJECT
  167. # ZONE
  168. #
  169. # Vars set:
  170. # KUBE_PROXY_TOKEN
  171. # NODE_PROBLEM_DETECTOR_TOKEN
  172. # CA_CERT_BASE64
  173. # EXTRA_DOCKER_OPTS
  174. # KUBELET_CERT_BASE64
  175. # KUBELET_KEY_BASE64
  176. function upgrade-nodes() {
  177. prepare-node-upgrade
  178. do-node-upgrade
  179. }
  180. function setup-base-image() {
  181. if [[ "${env_os_distro}" == "false" ]]; then
  182. echo "== Ensuring that new Node base OS image matched the existing Node base OS image"
  183. NODE_OS_DISTRIBUTION=$(get-node-os "${NODE_NAMES[0]}")
  184. if [[ "${NODE_OS_DISTRIBUTION}" == "cos" ]]; then
  185. NODE_OS_DISTRIBUTION="gci"
  186. fi
  187. source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh"
  188. # Reset the node image based on current os distro
  189. set-linux-node-image
  190. fi
  191. }
  192. # prepare-node-upgrade creates a new instance template suitable for upgrading
  193. # to KUBE_VERSION and echos a single line with the name of the new template.
  194. #
  195. # Assumed vars:
  196. # KUBE_VERSION
  197. # NODE_SCOPES
  198. # NODE_INSTANCE_PREFIX
  199. # PROJECT
  200. # ZONE
  201. #
  202. # Vars set:
  203. # SANITIZED_VERSION
  204. # INSTANCE_GROUPS
  205. # KUBE_PROXY_TOKEN
  206. # NODE_PROBLEM_DETECTOR_TOKEN
  207. # CA_CERT_BASE64
  208. # EXTRA_DOCKER_OPTS
  209. # KUBELET_CERT_BASE64
  210. # KUBELET_KEY_BASE64
  211. function prepare-node-upgrade() {
  212. echo "== Preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
  213. setup-base-image
  214. SANITIZED_VERSION=$(echo ${KUBE_VERSION} | sed 's/[\.\+]/-/g')
  215. # TODO(zmerlynn): Refactor setting scope flags.
  216. local scope_flags=
  217. if [ -n "${NODE_SCOPES}" ]; then
  218. scope_flags="--scopes ${NODE_SCOPES}"
  219. else
  220. scope_flags="--no-scopes"
  221. fi
  222. # Get required node env vars from exiting template.
  223. local node_env=$(get-node-env)
  224. KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN")
  225. NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${node_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
  226. CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT")
  227. EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS")
  228. KUBELET_CERT_BASE64=$(get-env-val "${node_env}" "KUBELET_CERT")
  229. KUBELET_KEY_BASE64=$(get-env-val "${node_env}" "KUBELET_KEY")
  230. upgrade-node-env
  231. # TODO(zmerlynn): How do we ensure kube-env is written in a ${version}-
  232. # compatible way?
  233. write-linux-node-env
  234. # TODO(zmerlynn): Get configure-vm script from ${version}. (Must plumb this
  235. # through all create-linux-node-instance-template implementations).
  236. local template_name=$(get-template-name-from-version ${SANITIZED_VERSION} ${NODE_INSTANCE_PREFIX})
  237. create-linux-node-instance-template "${template_name}"
  238. # The following is echo'd so that callers can get the template name.
  239. echo "Instance template name: ${template_name}"
  240. echo "== Finished preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
  241. }
  242. function upgrade-node-env() {
  243. echo "== Upgrading node environment variables. =="
  244. # Get the node problem detector token from master if it isn't present on
  245. # the original node.
  246. if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
  247. detect-master
  248. local master_env=$(get-master-env)
  249. NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${master_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
  250. fi
  251. }
  252. # Upgrades a single node.
  253. # $1: The name of the node
  254. #
  255. # Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe.
  256. function do-single-node-upgrade() {
  257. local -r instance="$1"
  258. local kubectl_rc
  259. local boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
  260. if [[ "${kubectl_rc}" != 0 ]]; then
  261. echo "== FAILED to get bootID ${instance} =="
  262. echo "${boot_id}"
  263. return ${kubectl_rc}
  264. fi
  265. # Drain node
  266. echo "== Draining ${instance}. == " >&2
  267. local drain_rc
  268. "${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
  269. && drain_rc=$? || drain_rc=$?
  270. if [[ "${drain_rc}" != 0 ]]; then
  271. echo "== FAILED to drain ${instance} =="
  272. return ${drain_rc}
  273. fi
  274. # Recreate instance
  275. echo "== Recreating instance ${instance}. ==" >&2
  276. local recreate_rc
  277. local recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
  278. --project="${PROJECT}" \
  279. --zone="${ZONE}" \
  280. --instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
  281. if [[ "${recreate_rc}" != 0 ]]; then
  282. echo "== FAILED to recreate ${instance} =="
  283. echo "${recreate}"
  284. return ${recreate_rc}
  285. fi
  286. # Wait for node status to reflect a new boot ID. This guarantees us
  287. # that the node status in the API is from a different boot. This
  288. # does not guarantee that the status is from the upgraded node, but
  289. # it is a best effort approximation.
  290. echo "== Waiting for new node to be added to k8s. ==" >&2
  291. while true; do
  292. local new_boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
  293. if [[ "${kubectl_rc}" != 0 ]]; then
  294. echo "== FAILED to get node ${instance} =="
  295. echo "${boot_id}"
  296. echo " (Will retry.)"
  297. elif [[ "${boot_id}" != "${new_boot_id}" ]]; then
  298. echo "Node ${instance} recreated."
  299. break
  300. else
  301. echo -n .
  302. fi
  303. sleep 1
  304. done
  305. # Wait for the node to have Ready=True.
  306. echo "== Waiting for ${instance} to become ready. ==" >&2
  307. while true; do
  308. local ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
  309. if [[ "${ready}" != 'True' ]]; then
  310. echo "Node ${instance} is still not ready: Ready=${ready}"
  311. else
  312. echo "Node ${instance} Ready=${ready}"
  313. break
  314. fi
  315. sleep 1
  316. done
  317. # Uncordon the node.
  318. echo "== Uncordon ${instance}. == " >&2
  319. local uncordon_rc
  320. "${KUBE_ROOT}/cluster/kubectl.sh" uncordon "${instance}" \
  321. && uncordon_rc=$? || uncordon_rc=$?
  322. if [[ "${uncordon_rc}" != 0 ]]; then
  323. echo "== FAILED to uncordon ${instance} =="
  324. return ${uncordon_rc}
  325. fi
  326. }
  327. # Prereqs:
  328. # - prepare-node-upgrade should have been called successfully
  329. function do-node-upgrade() {
  330. echo "== Upgrading nodes to ${KUBE_VERSION} with max parallelism of ${node_upgrade_parallelism}. ==" >&2
  331. # Do the actual upgrade.
  332. # NOTE(zmerlynn): If you are changing this gcloud command, update
  333. # test/e2e/cluster_upgrade.go to match this EXACTLY.
  334. local template_name=$(get-template-name-from-version ${SANITIZED_VERSION} ${NODE_INSTANCE_PREFIX})
  335. local old_templates=()
  336. local updates=()
  337. for group in ${INSTANCE_GROUPS[@]}; do
  338. old_templates+=($(gcloud compute instance-groups managed list \
  339. --project="${PROJECT}" \
  340. --filter="name ~ '${group}' AND zone:(${ZONE})" \
  341. --format='value(instanceTemplate)' || true))
  342. set_instance_template_out=$(gcloud compute instance-groups managed set-instance-template "${group}" \
  343. --template="${template_name}" \
  344. --project="${PROJECT}" \
  345. --zone="${ZONE}" 2>&1) && set_instance_template_rc=$? || set_instance_template_rc=$?
  346. if [[ "${set_instance_template_rc}" != 0 ]]; then
  347. echo "== FAILED to set-instance-template for ${group} to ${template_name} =="
  348. echo "${set_instance_template_out}"
  349. return ${set_instance_template_rc}
  350. fi
  351. instances=()
  352. instances+=($(gcloud compute instance-groups managed list-instances "${group}" \
  353. --format='value(instance)' \
  354. --project="${PROJECT}" \
  355. --zone="${ZONE}" 2>&1)) && list_instances_rc=$? || list_instances_rc=$?
  356. if [[ "${list_instances_rc}" != 0 ]]; then
  357. echo "== FAILED to list instances in group ${group} =="
  358. echo "${instances}"
  359. return ${list_instances_rc}
  360. fi
  361. process_count_left=${node_upgrade_parallelism}
  362. pids=()
  363. ret_code_sum=0 # Should stay 0 in the loop iff all parallel node upgrades succeed.
  364. for instance in ${instances[@]}; do
  365. do-single-node-upgrade "${instance}" & pids+=("$!")
  366. # We don't want to run more than ${node_upgrade_parallelism} upgrades at a time,
  367. # so wait once we hit that many nodes. This isn't ideal, since one might take much
  368. # longer than the others, but it should help.
  369. process_count_left=$((process_count_left - 1))
  370. if [[ process_count_left -eq 0 || "${instance}" == "${instances[-1]}" ]]; then
  371. # Wait for each of the parallel node upgrades to finish.
  372. for pid in "${pids[@]}"; do
  373. wait $pid
  374. ret_code_sum=$(( ret_code_sum + $? ))
  375. done
  376. # Return even if at least one of the node upgrades failed.
  377. if [[ ${ret_code_sum} != 0 ]]; then
  378. echo "== Some of the ${node_upgrade_parallelism} parallel node upgrades failed. =="
  379. return ${ret_code_sum}
  380. fi
  381. process_count_left=${node_upgrade_parallelism}
  382. fi
  383. done
  384. done
  385. # Remove the old templates.
  386. echo "== Deleting old templates in ${PROJECT}. ==" >&2
  387. for tmpl in ${old_templates[@]}; do
  388. gcloud compute instance-templates delete \
  389. --quiet \
  390. --project="${PROJECT}" \
  391. "${tmpl}" || true
  392. done
  393. echo "== Finished upgrading nodes to ${KUBE_VERSION}. ==" >&2
  394. }
  395. function update-coredns-config() {
  396. # Get the current CoreDNS version
  397. local -r coredns_addon_path="/etc/kubernetes/addons/0-dns/coredns"
  398. local -r tmpdir=/tmp
  399. local -r download_dir=$(mktemp --tmpdir=${tmpdir} -d coredns-migration.XXXXXXXXXX) || exit 1
  400. # clean up
  401. cleanup() {
  402. rm -rf "${download_dir}"
  403. }
  404. trap cleanup EXIT
  405. # Get the new installed CoreDNS version
  406. echo "Waiting for CoreDNS to update"
  407. until [[ $(${KUBE_ROOT}/cluster/kubectl.sh -n kube-system get deployment coredns -o=jsonpath='{$.metadata.resourceVersion}') -ne ${COREDNS_DEPLOY_RESOURCE_VERSION} ]]; do
  408. sleep 1
  409. done
  410. echo "Fetching the latest installed CoreDNS version"
  411. NEW_COREDNS_VERSION=$(${KUBE_ROOT}/cluster/kubectl.sh -n kube-system get deployment coredns -o=jsonpath='{$.spec.template.spec.containers[:1].image}' | cut -d ":" -f 2)
  412. case "$(uname -m)" in
  413. x86_64*)
  414. host_arch=amd64
  415. corefile_tool_SHA="8019665ef9e7d40e2cd468d0d3e72d641584a2e33d2d436135919ff576054257"
  416. ;;
  417. i?86_64*)
  418. host_arch=amd64
  419. corefile_tool_SHA="8019665ef9e7d40e2cd468d0d3e72d641584a2e33d2d436135919ff576054257"
  420. ;;
  421. amd64*)
  422. host_arch=amd64
  423. corefile_tool_SHA="8019665ef9e7d40e2cd468d0d3e72d641584a2e33d2d436135919ff576054257"
  424. ;;
  425. aarch64*)
  426. host_arch=arm64
  427. corefile_tool_SHA="923fcf6e0d1ccb85bdab9b0a732e5a7d27d1346ddff2cdde205e31f126e6c491"
  428. ;;
  429. arm64*)
  430. host_arch=arm64
  431. corefile_tool_SHA="923fcf6e0d1ccb85bdab9b0a732e5a7d27d1346ddff2cdde205e31f126e6c491"
  432. ;;
  433. arm*)
  434. host_arch=arm
  435. corefile_tool_SHA="caf026a50ba0284b96d4125d9f7d18ff867860031ea9124322d88f6baf9f6a48"
  436. ;;
  437. s390x*)
  438. host_arch=s390x
  439. corefile_tool_SHA="119d25b44a54deec7e1af4c54af01eb3609c376301e586888cfc1f56951729f9"
  440. ;;
  441. ppc64le*)
  442. host_arch=ppc64le
  443. corefile_tool_SHA="8e873c3363d09d73fe34699981b79d977613c19da3d8d2473582ca24368abefd"
  444. ;;
  445. *)
  446. echo "Unsupported host arch. Must be x86_64, 386, arm, arm64, s390x or ppc64le." >&2
  447. exit 1
  448. ;;
  449. esac
  450. # Download the CoreDNS migration tool
  451. echo "== Downloading the CoreDNS migration tool =="
  452. wget -P ${download_dir} "https://github.com/coredns/corefile-migration/releases/download/v1.0.4/corefile-tool-${host_arch}" >/dev/null 2>&1
  453. local -r checkSHA=$(sha256sum ${download_dir}/corefile-tool-${host_arch} | cut -d " " -f 1)
  454. if [[ "${checkSHA}" != "${corefile_tool_SHA}" ]]; then
  455. echo "!!! CheckSum for the CoreDNS migration tool did not match !!!" >&2
  456. exit 1
  457. fi
  458. chmod +x ${download_dir}/corefile-tool-${host_arch}
  459. # Migrate the CoreDNS ConfigMap depending on whether it is being downgraded or upgraded.
  460. ${KUBE_ROOT}/cluster/kubectl.sh -n kube-system get cm coredns -o jsonpath='{.data.Corefile}' > ${download_dir}/Corefile-old
  461. if test "$(printf '%s\n' ${CURRENT_COREDNS_VERSION} ${NEW_COREDNS_VERSION} | sort -V | head -n 1)" != ${NEW_COREDNS_VERSION}; then
  462. echo "== Upgrading the CoreDNS ConfigMap =="
  463. ${download_dir}/corefile-tool-${host_arch} migrate --from ${CURRENT_COREDNS_VERSION} --to ${NEW_COREDNS_VERSION} --corefile ${download_dir}/Corefile-old > ${download_dir}/Corefile
  464. ${KUBE_ROOT}/cluster/kubectl.sh -n kube-system create configmap coredns --from-file ${download_dir}/Corefile -o yaml --dry-run | ${KUBE_ROOT}/cluster/kubectl.sh apply -f -
  465. else
  466. # In case of a downgrade, a custom CoreDNS Corefile will be overwritten by a default Corefile. In that case,
  467. # the user will need to manually modify the resulting (default) Corefile after the downgrade is complete.
  468. echo "== Applying the latest default CoreDNS configuration =="
  469. gcloud compute --project ${PROJECT} scp --zone ${ZONE} ${MASTER_NAME}:${coredns_addon_path}/coredns.yaml ${download_dir}/coredns-manifest.yaml > /dev/null
  470. ${KUBE_ROOT}/cluster/kubectl.sh apply -f ${download_dir}/coredns-manifest.yaml
  471. fi
  472. echo "== The CoreDNS Config has been updated =="
  473. }
  474. echo "Fetching the previously installed CoreDNS version"
  475. CURRENT_COREDNS_VERSION=$(${KUBE_ROOT}/cluster/kubectl.sh -n kube-system get deployment coredns -o=jsonpath='{$.spec.template.spec.containers[:1].image}' | cut -d ":" -f 2)
  476. COREDNS_DEPLOY_RESOURCE_VERSION=$(${KUBE_ROOT}/cluster/kubectl.sh -n kube-system get deployment coredns -o=jsonpath='{$.metadata.resourceVersion}')
  477. master_upgrade=true
  478. node_upgrade=true
  479. node_prereqs=false
  480. local_binaries=false
  481. env_os_distro=false
  482. node_upgrade_parallelism=1
  483. while getopts ":MNPlcho" opt; do
  484. case ${opt} in
  485. M)
  486. node_upgrade=false
  487. ;;
  488. N)
  489. master_upgrade=false
  490. ;;
  491. P)
  492. node_prereqs=true
  493. ;;
  494. l)
  495. local_binaries=true
  496. ;;
  497. c)
  498. node_upgrade_parallelism=${NODE_UPGRADE_PARALLELISM:-1}
  499. ;;
  500. o)
  501. env_os_distro=true
  502. ;;
  503. h)
  504. usage
  505. exit 0
  506. ;;
  507. \?)
  508. echo "Invalid option: -$OPTARG" >&2
  509. usage
  510. exit 1
  511. ;;
  512. esac
  513. done
  514. shift $((OPTIND-1))
  515. if [[ $# -gt 1 ]]; then
  516. echo "Error: Only one parameter (<version number or publication>) may be passed after the set of flags!" >&2
  517. usage
  518. exit 1
  519. fi
  520. if [[ $# -lt 1 ]] && [[ "${local_binaries}" == "false" ]]; then
  521. usage
  522. exit 1
  523. fi
  524. if [[ "${master_upgrade}" == "false" ]] && [[ "${node_upgrade}" == "false" ]]; then
  525. echo "Can't specify both -M and -N" >&2
  526. exit 1
  527. fi
  528. # prompt if etcd storage media type isn't set unless using etcd2 when doing master upgrade
  529. if [[ -z "${STORAGE_MEDIA_TYPE:-}" ]] && [[ "${STORAGE_BACKEND:-}" != "etcd2" ]] && [[ "${master_upgrade}" == "true" ]]; then
  530. echo "The default etcd storage media type in 1.6 has changed from application/json to application/vnd.kubernetes.protobuf."
  531. echo "Documentation about the change can be found at https://kubernetes.io/docs/admin/etcd_upgrade."
  532. echo ""
  533. echo "ETCD2 DOES NOT SUPPORT PROTOBUF: If you wish to have to ability to downgrade to etcd2 later application/json must be used."
  534. echo ""
  535. echo "It's HIGHLY recommended that etcd be backed up before this step!!"
  536. echo ""
  537. echo "To enable using json, before running this script set:"
  538. echo "export STORAGE_MEDIA_TYPE=application/json"
  539. echo ""
  540. if [ -t 0 ] && [ -t 1 ]; then
  541. read -p "Would you like to continue with the new default, and lose the ability to downgrade to etcd2? [y/N] " confirm
  542. if [[ "${confirm}" != "y" ]]; then
  543. exit 1
  544. fi
  545. else
  546. echo "To enable using protobuf, before running this script set:"
  547. echo "export STORAGE_MEDIA_TYPE=application/vnd.kubernetes.protobuf"
  548. echo ""
  549. echo "STORAGE_MEDIA_TYPE must be specified when run non-interactively." >&2
  550. exit 1
  551. fi
  552. fi
  553. # Prompt if etcd image/version is unspecified when doing master upgrade.
  554. # In e2e tests, we use TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true to skip this
  555. # prompt, simulating the behavior when the user confirms interactively.
  556. # All other automated use of this script should explicitly specify a version.
  557. if [[ "${master_upgrade}" == "true" ]]; then
  558. if [[ -z "${ETCD_IMAGE:-}" && -z "${TEST_ETCD_IMAGE:-}" ]] || [[ -z "${ETCD_VERSION:-}" && -z "${TEST_ETCD_VERSION:-}" ]]; then
  559. echo
  560. echo "***WARNING***"
  561. echo "Upgrading Kubernetes with this script might result in an upgrade to a new etcd version."
  562. echo "Some etcd version upgrades, such as 3.0.x to 3.1.x, DO NOT offer a downgrade path."
  563. echo "To pin the etcd version to your current one (e.g. v3.0.17), set the following variables"
  564. echo "before running this script:"
  565. echo
  566. echo "# example: pin to etcd v3.0.17"
  567. echo "export ETCD_IMAGE=3.0.17"
  568. echo "export ETCD_VERSION=3.0.17"
  569. echo
  570. echo "Alternatively, if you choose to allow an etcd upgrade that doesn't support downgrade,"
  571. echo "you might still be able to downgrade Kubernetes by pinning to the newer etcd version."
  572. echo "In all cases, it is strongly recommended to have an etcd backup before upgrading."
  573. echo
  574. if [ -t 0 ] && [ -t 1 ]; then
  575. read -p "Continue with default etcd version, which might upgrade etcd? [y/N] " confirm
  576. if [[ "${confirm}" != "y" ]]; then
  577. exit 1
  578. fi
  579. elif [[ "${TEST_ALLOW_IMPLICIT_ETCD_UPGRADE:-}" != "true" ]]; then
  580. echo "ETCD_IMAGE and ETCD_VERSION must be specified when run non-interactively." >&2
  581. exit 1
  582. fi
  583. fi
  584. fi
  585. print-node-version-info "Pre-Upgrade"
  586. if [[ "${local_binaries}" == "false" ]]; then
  587. set_binary_version ${1}
  588. fi
  589. prepare-upgrade
  590. if [[ "${node_prereqs}" == "true" ]]; then
  591. prepare-node-upgrade
  592. exit 0
  593. fi
  594. if [[ "${master_upgrade}" == "true" ]]; then
  595. upgrade-master
  596. fi
  597. if [[ "${node_upgrade}" == "true" ]]; then
  598. if [[ "${local_binaries}" == "true" ]]; then
  599. echo "Upgrading nodes to local binaries is not yet supported." >&2
  600. exit 1
  601. else
  602. upgrade-nodes
  603. fi
  604. fi
  605. if [[ "${CLUSTER_DNS_CORE_DNS:-}" == "true" ]]; then
  606. update-coredns-config
  607. fi
  608. echo "== Validating cluster post-upgrade =="
  609. "${KUBE_ROOT}/cluster/validate-cluster.sh"
  610. print-node-version-info "Post-Upgrade"