staticpods.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package upgrade
  14. import (
  15. "fmt"
  16. "os"
  17. "path/filepath"
  18. "strings"
  19. "time"
  20. "github.com/pkg/errors"
  21. utilerrors "k8s.io/apimachinery/pkg/util/errors"
  22. "k8s.io/apimachinery/pkg/util/version"
  23. clientset "k8s.io/client-go/kubernetes"
  24. "k8s.io/klog"
  25. kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
  26. "k8s.io/kubernetes/cmd/kubeadm/app/constants"
  27. certsphase "k8s.io/kubernetes/cmd/kubeadm/app/phases/certs"
  28. "k8s.io/kubernetes/cmd/kubeadm/app/phases/certs/renewal"
  29. "k8s.io/kubernetes/cmd/kubeadm/app/phases/controlplane"
  30. etcdphase "k8s.io/kubernetes/cmd/kubeadm/app/phases/etcd"
  31. kubeadmutil "k8s.io/kubernetes/cmd/kubeadm/app/util"
  32. "k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
  33. dryrunutil "k8s.io/kubernetes/cmd/kubeadm/app/util/dryrun"
  34. etcdutil "k8s.io/kubernetes/cmd/kubeadm/app/util/etcd"
  35. "k8s.io/kubernetes/cmd/kubeadm/app/util/staticpod"
  36. )
  37. const (
  38. // UpgradeManifestTimeout is timeout of upgrading the static pod manifest
  39. UpgradeManifestTimeout = 5 * time.Minute
  40. )
  41. // StaticPodPathManager is responsible for tracking the directories used in the static pod upgrade transition
  42. type StaticPodPathManager interface {
  43. // MoveFile should move a file from oldPath to newPath
  44. MoveFile(oldPath, newPath string) error
  45. // KubernetesDir is the directory Kubernetes owns for storing various configuration files
  46. KubernetesDir() string
  47. // KustomizeDir should point to the folder where kustomize patches for static pod manifest are stored
  48. KustomizeDir() string
  49. // RealManifestPath gets the file path for the component in the "real" static pod manifest directory used by the kubelet
  50. RealManifestPath(component string) string
  51. // RealManifestDir should point to the static pod manifest directory used by the kubelet
  52. RealManifestDir() string
  53. // TempManifestPath gets the file path for the component in the temporary directory created for generating new manifests for the upgrade
  54. TempManifestPath(component string) string
  55. // TempManifestDir should point to the temporary directory created for generating new manifests for the upgrade
  56. TempManifestDir() string
  57. // BackupManifestPath gets the file path for the component in the backup directory used for backuping manifests during the transition
  58. BackupManifestPath(component string) string
  59. // BackupManifestDir should point to the backup directory used for backuping manifests during the transition
  60. BackupManifestDir() string
  61. // BackupEtcdDir should point to the backup directory used for backuping manifests during the transition
  62. BackupEtcdDir() string
  63. // CleanupDirs cleans up all temporary directories
  64. CleanupDirs() error
  65. }
  66. // KubeStaticPodPathManager is a real implementation of StaticPodPathManager that is used when upgrading a static pod cluster
  67. type KubeStaticPodPathManager struct {
  68. kubernetesDir string
  69. kustomizeDir string
  70. realManifestDir string
  71. tempManifestDir string
  72. backupManifestDir string
  73. backupEtcdDir string
  74. keepManifestDir bool
  75. keepEtcdDir bool
  76. }
  77. // NewKubeStaticPodPathManager creates a new instance of KubeStaticPodPathManager
  78. func NewKubeStaticPodPathManager(kubernetesDir, kustomizeDir, tempDir, backupDir, backupEtcdDir string, keepManifestDir, keepEtcdDir bool) StaticPodPathManager {
  79. return &KubeStaticPodPathManager{
  80. kubernetesDir: kubernetesDir,
  81. kustomizeDir: kustomizeDir,
  82. realManifestDir: filepath.Join(kubernetesDir, constants.ManifestsSubDirName),
  83. tempManifestDir: tempDir,
  84. backupManifestDir: backupDir,
  85. backupEtcdDir: backupEtcdDir,
  86. keepManifestDir: keepManifestDir,
  87. keepEtcdDir: keepEtcdDir,
  88. }
  89. }
  90. // NewKubeStaticPodPathManagerUsingTempDirs creates a new instance of KubeStaticPodPathManager with temporary directories backing it
  91. func NewKubeStaticPodPathManagerUsingTempDirs(kubernetesDir, kustomizeDir string, saveManifestsDir, saveEtcdDir bool) (StaticPodPathManager, error) {
  92. upgradedManifestsDir, err := constants.CreateTempDirForKubeadm(kubernetesDir, "kubeadm-upgraded-manifests")
  93. if err != nil {
  94. return nil, err
  95. }
  96. backupManifestsDir, err := constants.CreateTimestampDirForKubeadm(kubernetesDir, "kubeadm-backup-manifests")
  97. if err != nil {
  98. return nil, err
  99. }
  100. backupEtcdDir, err := constants.CreateTimestampDirForKubeadm(kubernetesDir, "kubeadm-backup-etcd")
  101. if err != nil {
  102. return nil, err
  103. }
  104. return NewKubeStaticPodPathManager(kubernetesDir, kustomizeDir, upgradedManifestsDir, backupManifestsDir, backupEtcdDir, saveManifestsDir, saveEtcdDir), nil
  105. }
  106. // MoveFile should move a file from oldPath to newPath
  107. func (spm *KubeStaticPodPathManager) MoveFile(oldPath, newPath string) error {
  108. return os.Rename(oldPath, newPath)
  109. }
  110. // KubernetesDir should point to the directory Kubernetes owns for storing various configuration files
  111. func (spm *KubeStaticPodPathManager) KubernetesDir() string {
  112. return spm.kubernetesDir
  113. }
  114. // KustomizeDir should point to the folder where kustomize patches for static pod manifest are stored
  115. func (spm *KubeStaticPodPathManager) KustomizeDir() string {
  116. return spm.kustomizeDir
  117. }
  118. // RealManifestPath gets the file path for the component in the "real" static pod manifest directory used by the kubelet
  119. func (spm *KubeStaticPodPathManager) RealManifestPath(component string) string {
  120. return constants.GetStaticPodFilepath(component, spm.realManifestDir)
  121. }
  122. // RealManifestDir should point to the static pod manifest directory used by the kubelet
  123. func (spm *KubeStaticPodPathManager) RealManifestDir() string {
  124. return spm.realManifestDir
  125. }
  126. // TempManifestPath gets the file path for the component in the temporary directory created for generating new manifests for the upgrade
  127. func (spm *KubeStaticPodPathManager) TempManifestPath(component string) string {
  128. return constants.GetStaticPodFilepath(component, spm.tempManifestDir)
  129. }
  130. // TempManifestDir should point to the temporary directory created for generating new manifests for the upgrade
  131. func (spm *KubeStaticPodPathManager) TempManifestDir() string {
  132. return spm.tempManifestDir
  133. }
  134. // BackupManifestPath gets the file path for the component in the backup directory used for backuping manifests during the transition
  135. func (spm *KubeStaticPodPathManager) BackupManifestPath(component string) string {
  136. return constants.GetStaticPodFilepath(component, spm.backupManifestDir)
  137. }
  138. // BackupManifestDir should point to the backup directory used for backuping manifests during the transition
  139. func (spm *KubeStaticPodPathManager) BackupManifestDir() string {
  140. return spm.backupManifestDir
  141. }
  142. // BackupEtcdDir should point to the backup directory used for backuping manifests during the transition
  143. func (spm *KubeStaticPodPathManager) BackupEtcdDir() string {
  144. return spm.backupEtcdDir
  145. }
  146. // CleanupDirs cleans up all temporary directories except those the user has requested to keep around
  147. func (spm *KubeStaticPodPathManager) CleanupDirs() error {
  148. var errlist []error
  149. if err := os.RemoveAll(spm.TempManifestDir()); err != nil {
  150. errlist = append(errlist, err)
  151. }
  152. if !spm.keepManifestDir {
  153. if err := os.RemoveAll(spm.BackupManifestDir()); err != nil {
  154. errlist = append(errlist, err)
  155. }
  156. }
  157. if !spm.keepEtcdDir {
  158. if err := os.RemoveAll(spm.BackupEtcdDir()); err != nil {
  159. errlist = append(errlist, err)
  160. }
  161. }
  162. return utilerrors.NewAggregate(errlist)
  163. }
  164. func upgradeComponent(component string, certsRenewMgr *renewal.Manager, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, beforePodHash string, recoverManifests map[string]string) error {
  165. // Special treatment is required for etcd case, when rollbackOldManifests should roll back etcd
  166. // manifests only for the case when component is Etcd
  167. recoverEtcd := false
  168. if component == constants.Etcd {
  169. recoverEtcd = true
  170. }
  171. fmt.Printf("[upgrade/staticpods] Preparing for %q upgrade\n", component)
  172. // The old manifest is here; in the /etc/kubernetes/manifests/
  173. currentManifestPath := pathMgr.RealManifestPath(component)
  174. // The new, upgraded manifest will be written here
  175. newManifestPath := pathMgr.TempManifestPath(component)
  176. // The old manifest will be moved here; into a subfolder of the temporary directory
  177. // If a rollback is needed, these manifests will be put back to where they where initially
  178. backupManifestPath := pathMgr.BackupManifestPath(component)
  179. // Store the backup path in the recover list. If something goes wrong now, this component will be rolled back.
  180. recoverManifests[component] = backupManifestPath
  181. // Skip upgrade if current and new manifests are equal
  182. equal, err := staticpod.ManifestFilesAreEqual(currentManifestPath, newManifestPath)
  183. if err != nil {
  184. return err
  185. }
  186. if equal {
  187. fmt.Printf("[upgrade/staticpods] Current and new manifests of %s are equal, skipping upgrade\n", component)
  188. return nil
  189. }
  190. // if certificate renewal should be performed
  191. if certsRenewMgr != nil {
  192. // renew all the certificates used by the current component
  193. if err := renewCertsByComponent(cfg, component, certsRenewMgr); err != nil {
  194. return rollbackOldManifests(recoverManifests, errors.Wrapf(err, "failed to renew certificates for component %q", component), pathMgr, recoverEtcd)
  195. }
  196. }
  197. // Move the old manifest into the old-manifests directory
  198. if err := pathMgr.MoveFile(currentManifestPath, backupManifestPath); err != nil {
  199. return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd)
  200. }
  201. // Move the new manifest into the manifests directory
  202. if err := pathMgr.MoveFile(newManifestPath, currentManifestPath); err != nil {
  203. return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd)
  204. }
  205. fmt.Printf("[upgrade/staticpods] Moved new manifest to %q and backed up old manifest to %q\n", currentManifestPath, backupManifestPath)
  206. fmt.Println("[upgrade/staticpods] Waiting for the kubelet to restart the component")
  207. fmt.Printf("[upgrade/staticpods] This might take a minute or longer depending on the component/version gap (timeout %v)\n", UpgradeManifestTimeout)
  208. // Wait for the mirror Pod hash to change; otherwise we'll run into race conditions here when the kubelet hasn't had time to
  209. // notice the removal of the Static Pod, leading to a false positive below where we check that the API endpoint is healthy
  210. // If we don't do this, there is a case where we remove the Static Pod manifest, kubelet is slow to react, kubeadm checks the
  211. // API endpoint below of the OLD Static Pod component and proceeds quickly enough, which might lead to unexpected results.
  212. if err := waiter.WaitForStaticPodHashChange(cfg.NodeRegistration.Name, component, beforePodHash); err != nil {
  213. return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd)
  214. }
  215. // Wait for the static pod component to come up and register itself as a mirror pod
  216. if err := waiter.WaitForPodsWithLabel("component=" + component); err != nil {
  217. return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd)
  218. }
  219. fmt.Printf("[upgrade/staticpods] Component %q upgraded successfully!\n", component)
  220. return nil
  221. }
  222. // performEtcdStaticPodUpgrade performs upgrade of etcd, it returns bool which indicates fatal error or not and the actual error.
  223. func performEtcdStaticPodUpgrade(certsRenewMgr *renewal.Manager, client clientset.Interface, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, recoverManifests map[string]string, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) (bool, error) {
  224. // Add etcd static pod spec only if external etcd is not configured
  225. if cfg.Etcd.External != nil {
  226. return false, errors.New("external etcd detected, won't try to change any etcd state")
  227. }
  228. // Checking health state of etcd before proceeding with the upgrade
  229. err := oldEtcdClient.CheckClusterHealth()
  230. if err != nil {
  231. return true, errors.Wrap(err, "etcd cluster is not healthy")
  232. }
  233. // Backing up etcd data store
  234. backupEtcdDir := pathMgr.BackupEtcdDir()
  235. runningEtcdDir := cfg.Etcd.Local.DataDir
  236. if err := kubeadmutil.CopyDir(runningEtcdDir, backupEtcdDir); err != nil {
  237. return true, errors.Wrap(err, "failed to back up etcd data")
  238. }
  239. // Need to check currently used version and version from constants, if differs then upgrade
  240. desiredEtcdVersion, warning, err := constants.EtcdSupportedVersion(constants.SupportedEtcdVersion, cfg.KubernetesVersion)
  241. if err != nil {
  242. return true, errors.Wrap(err, "failed to retrieve an etcd version for the target Kubernetes version")
  243. }
  244. if warning != nil {
  245. klog.Warningf("[upgrade/etcd] %v", warning)
  246. }
  247. // gets the etcd version of the local/stacked etcd member running on the current machine
  248. currentEtcdVersions, err := oldEtcdClient.GetClusterVersions()
  249. if err != nil {
  250. return true, errors.Wrap(err, "failed to retrieve the current etcd version")
  251. }
  252. currentEtcdVersionStr, ok := currentEtcdVersions[etcdutil.GetClientURL(&cfg.LocalAPIEndpoint)]
  253. if !ok {
  254. return true, errors.Wrap(err, "failed to retrieve the current etcd version")
  255. }
  256. currentEtcdVersion, err := version.ParseSemantic(currentEtcdVersionStr)
  257. if err != nil {
  258. return true, errors.Wrapf(err, "failed to parse the current etcd version(%s)", currentEtcdVersionStr)
  259. }
  260. // Comparing current etcd version with desired to catch the same version or downgrade condition and fail on them.
  261. if desiredEtcdVersion.LessThan(currentEtcdVersion) {
  262. return false, errors.Errorf("the desired etcd version for this Kubernetes version %q is %q, but the current etcd version is %q. Won't downgrade etcd, instead just continue", cfg.KubernetesVersion, desiredEtcdVersion.String(), currentEtcdVersion.String())
  263. }
  264. // For the case when desired etcd version is the same as current etcd version
  265. if strings.Compare(desiredEtcdVersion.String(), currentEtcdVersion.String()) == 0 {
  266. return false, nil
  267. }
  268. beforeEtcdPodHash, err := waiter.WaitForStaticPodSingleHash(cfg.NodeRegistration.Name, constants.Etcd)
  269. if err != nil {
  270. return true, errors.Wrap(err, "failed to get etcd pod's hash")
  271. }
  272. // Write the updated etcd static Pod manifest into the temporary directory, at this point no etcd change
  273. // has occurred in any aspects.
  274. if err := etcdphase.CreateLocalEtcdStaticPodManifestFile(pathMgr.TempManifestDir(), pathMgr.KustomizeDir(), cfg.NodeRegistration.Name, &cfg.ClusterConfiguration, &cfg.LocalAPIEndpoint); err != nil {
  275. return true, errors.Wrap(err, "error creating local etcd static pod manifest file")
  276. }
  277. retries := 10
  278. retryInterval := 15 * time.Second
  279. // Perform etcd upgrade using common to all control plane components function
  280. if err := upgradeComponent(constants.Etcd, certsRenewMgr, waiter, pathMgr, cfg, beforeEtcdPodHash, recoverManifests); err != nil {
  281. fmt.Printf("[upgrade/etcd] Failed to upgrade etcd: %v\n", err)
  282. // Since upgrade component failed, the old etcd manifest has either been restored or was never touched
  283. // Now we need to check the health of etcd cluster if it is up with old manifest
  284. fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available")
  285. if _, err := oldEtcdClient.WaitForClusterAvailable(retries, retryInterval); err != nil {
  286. fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err)
  287. // At this point we know that etcd cluster is dead and it is safe to copy backup datastore and to rollback old etcd manifest
  288. fmt.Println("[upgrade/etcd] Rolling back etcd data")
  289. if err := rollbackEtcdData(cfg, pathMgr); err != nil {
  290. // Even copying back datastore failed, no options for recovery left, bailing out
  291. return true, errors.Errorf("fatal error rolling back local etcd cluster datadir: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir)
  292. }
  293. fmt.Println("[upgrade/etcd] Etcd data rollback successful")
  294. // Now that we've rolled back the data, let's check if the cluster comes up
  295. fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available")
  296. if _, err := oldEtcdClient.WaitForClusterAvailable(retries, retryInterval); err != nil {
  297. fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err)
  298. // Nothing else left to try to recover etcd cluster
  299. return true, errors.Wrapf(err, "fatal error rolling back local etcd cluster manifest, the backup of etcd database is stored here:(%s)", backupEtcdDir)
  300. }
  301. // We've recovered to the previous etcd from this case
  302. }
  303. fmt.Println("[upgrade/etcd] Etcd was rolled back and is now available")
  304. // Since etcd cluster came back up with the old manifest
  305. return true, errors.Wrap(err, "fatal error when trying to upgrade the etcd cluster, rolled the state back to pre-upgrade state")
  306. }
  307. // Initialize the new etcd client if it wasn't pre-initialized
  308. if newEtcdClient == nil {
  309. etcdClient, err := etcdutil.NewFromCluster(client, cfg.CertificatesDir)
  310. if err != nil {
  311. return true, errors.Wrap(err, "fatal error creating etcd client")
  312. }
  313. newEtcdClient = etcdClient
  314. }
  315. // Checking health state of etcd after the upgrade
  316. fmt.Println("[upgrade/etcd] Waiting for etcd to become available")
  317. if _, err = newEtcdClient.WaitForClusterAvailable(retries, retryInterval); err != nil {
  318. fmt.Printf("[upgrade/etcd] Failed to healthcheck etcd: %v\n", err)
  319. // Despite the fact that upgradeComponent was successful, there is something wrong with the etcd cluster
  320. // First step is to restore back up of datastore
  321. fmt.Println("[upgrade/etcd] Rolling back etcd data")
  322. if err := rollbackEtcdData(cfg, pathMgr); err != nil {
  323. // Even copying back datastore failed, no options for recovery left, bailing out
  324. return true, errors.Wrapf(err, "fatal error rolling back local etcd cluster datadir, the backup of etcd database is stored here:(%s)", backupEtcdDir)
  325. }
  326. fmt.Println("[upgrade/etcd] Etcd data rollback successful")
  327. // Old datastore has been copied, rolling back old manifests
  328. fmt.Println("[upgrade/etcd] Rolling back etcd manifest")
  329. rollbackOldManifests(recoverManifests, err, pathMgr, true)
  330. // rollbackOldManifests() always returns an error -- ignore it and continue
  331. // Assuming rollback of the old etcd manifest was successful, check the status of etcd cluster again
  332. fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available")
  333. if _, err := oldEtcdClient.WaitForClusterAvailable(retries, retryInterval); err != nil {
  334. fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err)
  335. // Nothing else left to try to recover etcd cluster
  336. return true, errors.Wrapf(err, "fatal error rolling back local etcd cluster manifest, the backup of etcd database is stored here:(%s)", backupEtcdDir)
  337. }
  338. fmt.Println("[upgrade/etcd] Etcd was rolled back and is now available")
  339. // We've successfully rolled back etcd, and now return an error describing that the upgrade failed
  340. return true, errors.Wrap(err, "fatal error upgrading local etcd cluster, rolled the state back to pre-upgrade state")
  341. }
  342. return false, nil
  343. }
  344. // StaticPodControlPlane upgrades a static pod-hosted control plane
  345. func StaticPodControlPlane(client clientset.Interface, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, etcdUpgrade, renewCerts bool, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) error {
  346. recoverManifests := map[string]string{}
  347. var isExternalEtcd bool
  348. beforePodHashMap, err := waiter.WaitForStaticPodControlPlaneHashes(cfg.NodeRegistration.Name)
  349. if err != nil {
  350. return err
  351. }
  352. if oldEtcdClient == nil {
  353. if cfg.Etcd.External != nil {
  354. // External etcd
  355. isExternalEtcd = true
  356. etcdClient, err := etcdutil.New(
  357. cfg.Etcd.External.Endpoints,
  358. cfg.Etcd.External.CAFile,
  359. cfg.Etcd.External.CertFile,
  360. cfg.Etcd.External.KeyFile,
  361. )
  362. if err != nil {
  363. return errors.Wrap(err, "failed to create etcd client for external etcd")
  364. }
  365. oldEtcdClient = etcdClient
  366. // Since etcd is managed externally, the new etcd client will be the same as the old client
  367. if newEtcdClient == nil {
  368. newEtcdClient = etcdClient
  369. }
  370. } else {
  371. // etcd Static Pod
  372. etcdClient, err := etcdutil.NewFromCluster(client, cfg.CertificatesDir)
  373. if err != nil {
  374. return errors.Wrap(err, "failed to create etcd client")
  375. }
  376. oldEtcdClient = etcdClient
  377. }
  378. }
  379. var certsRenewMgr *renewal.Manager
  380. if renewCerts {
  381. certsRenewMgr, err = renewal.NewManager(&cfg.ClusterConfiguration, pathMgr.KubernetesDir())
  382. if err != nil {
  383. return errors.Wrap(err, "failed to create the certificate renewal manager")
  384. }
  385. }
  386. // etcd upgrade is done prior to other control plane components
  387. if !isExternalEtcd && etcdUpgrade {
  388. // set the TLS upgrade flag for all components
  389. fmt.Printf("[upgrade/etcd] Upgrading to TLS for %s\n", constants.Etcd)
  390. // Perform etcd upgrade using common to all control plane components function
  391. fatal, err := performEtcdStaticPodUpgrade(certsRenewMgr, client, waiter, pathMgr, cfg, recoverManifests, oldEtcdClient, newEtcdClient)
  392. if err != nil {
  393. if fatal {
  394. return err
  395. }
  396. fmt.Printf("[upgrade/etcd] Non fatal issue encountered during upgrade: %v\n", err)
  397. }
  398. }
  399. // Write the updated static Pod manifests into the temporary directory
  400. fmt.Printf("[upgrade/staticpods] Writing new Static Pod manifests to %q\n", pathMgr.TempManifestDir())
  401. err = controlplane.CreateInitStaticPodManifestFiles(pathMgr.TempManifestDir(), pathMgr.KustomizeDir(), cfg)
  402. if err != nil {
  403. return errors.Wrap(err, "error creating init static pod manifest files")
  404. }
  405. for _, component := range constants.ControlPlaneComponents {
  406. if err = upgradeComponent(component, certsRenewMgr, waiter, pathMgr, cfg, beforePodHashMap[component], recoverManifests); err != nil {
  407. return err
  408. }
  409. }
  410. if renewCerts {
  411. // renew the certificate embedded in the admin.conf file
  412. renewed, err := certsRenewMgr.RenewUsingLocalCA(constants.AdminKubeConfigFileName)
  413. if err != nil {
  414. return rollbackOldManifests(recoverManifests, errors.Wrapf(err, "failed to upgrade the %s certificates", constants.AdminKubeConfigFileName), pathMgr, false)
  415. }
  416. if !renewed {
  417. // if not error, but not renewed because of external CA detected, inform the user
  418. fmt.Printf("[upgrade/staticpods] External CA detected, %s certificate can't be renewed\n", constants.AdminKubeConfigFileName)
  419. }
  420. }
  421. // Remove the temporary directories used on a best-effort (don't fail if the calls error out)
  422. // The calls are set here by design; we should _not_ use "defer" above as that would remove the directories
  423. // even in the "fail and rollback" case, where we want the directories preserved for the user.
  424. return pathMgr.CleanupDirs()
  425. }
  426. // rollbackOldManifests rolls back the backed-up manifests if something went wrong.
  427. // It always returns an error to the caller.
  428. func rollbackOldManifests(oldManifests map[string]string, origErr error, pathMgr StaticPodPathManager, restoreEtcd bool) error {
  429. errs := []error{origErr}
  430. for component, backupPath := range oldManifests {
  431. // Will restore etcd manifest only if it was explicitly requested by setting restoreEtcd to True
  432. if component == constants.Etcd && !restoreEtcd {
  433. continue
  434. }
  435. // Where we should put back the backed up manifest
  436. realManifestPath := pathMgr.RealManifestPath(component)
  437. // Move the backup manifest back into the manifests directory
  438. err := pathMgr.MoveFile(backupPath, realManifestPath)
  439. if err != nil {
  440. errs = append(errs, err)
  441. }
  442. }
  443. // Let the user know there were problems, but we tried to recover
  444. return errors.Wrap(utilerrors.NewAggregate(errs),
  445. "couldn't upgrade control plane. kubeadm has tried to recover everything into the earlier state. Errors faced")
  446. }
  447. // rollbackEtcdData rolls back the content of etcd folder if something went wrong.
  448. // When the folder contents are successfully rolled back, nil is returned, otherwise an error is returned.
  449. func rollbackEtcdData(cfg *kubeadmapi.InitConfiguration, pathMgr StaticPodPathManager) error {
  450. backupEtcdDir := pathMgr.BackupEtcdDir()
  451. runningEtcdDir := cfg.Etcd.Local.DataDir
  452. if err := kubeadmutil.CopyDir(backupEtcdDir, runningEtcdDir); err != nil {
  453. // Let the user know there we're problems, but we tried to reçover
  454. return errors.Wrapf(err, "couldn't recover etcd database with error, the location of etcd backup: %s ", backupEtcdDir)
  455. }
  456. return nil
  457. }
  458. // renewCertsByComponent takes charge of renewing certificates used by a specific component before
  459. // the static pod of the component is upgraded
  460. func renewCertsByComponent(cfg *kubeadmapi.InitConfiguration, component string, certsRenewMgr *renewal.Manager) error {
  461. var certificates []string
  462. // if etcd, only in case of local etcd, renew server, peer and health check certificate
  463. if component == constants.Etcd {
  464. if cfg.Etcd.Local != nil {
  465. certificates = []string{
  466. certsphase.KubeadmCertEtcdServer.Name,
  467. certsphase.KubeadmCertEtcdPeer.Name,
  468. certsphase.KubeadmCertEtcdHealthcheck.Name,
  469. }
  470. }
  471. }
  472. // if apiserver, renew apiserver serving certificate, kubelet and front-proxy client certificate.
  473. //if local etcd, renew also the etcd client certificate
  474. if component == constants.KubeAPIServer {
  475. certificates = []string{
  476. certsphase.KubeadmCertAPIServer.Name,
  477. certsphase.KubeadmCertKubeletClient.Name,
  478. certsphase.KubeadmCertFrontProxyClient.Name,
  479. }
  480. if cfg.Etcd.Local != nil {
  481. certificates = append(certificates, certsphase.KubeadmCertEtcdAPIClient.Name)
  482. }
  483. }
  484. // if controller-manager, renew the certificate embedded in the controller-manager kubeConfig file
  485. if component == constants.KubeControllerManager {
  486. certificates = []string{
  487. constants.ControllerManagerKubeConfigFileName,
  488. }
  489. }
  490. // if scheduler, renew the certificate embedded in the scheduler kubeConfig file
  491. if component == constants.KubeScheduler {
  492. certificates = []string{
  493. constants.SchedulerKubeConfigFileName,
  494. }
  495. }
  496. // renew the selected components
  497. for _, cert := range certificates {
  498. fmt.Printf("[upgrade/staticpods] Renewing %s certificate\n", cert)
  499. renewed, err := certsRenewMgr.RenewUsingLocalCA(cert)
  500. if err != nil {
  501. return err
  502. }
  503. if !renewed {
  504. // if not error, but not renewed because of external CA detected, inform the user
  505. fmt.Printf("[upgrade/staticpods] External CA detected, %s certificate can't be renewed\n", cert)
  506. }
  507. }
  508. return nil
  509. }
  510. // GetPathManagerForUpgrade returns a path manager properly configured for the given InitConfiguration.
  511. func GetPathManagerForUpgrade(kubernetesDir, kustomizeDir string, internalcfg *kubeadmapi.InitConfiguration, etcdUpgrade bool) (StaticPodPathManager, error) {
  512. isExternalEtcd := internalcfg.Etcd.External != nil
  513. return NewKubeStaticPodPathManagerUsingTempDirs(kubernetesDir, kustomizeDir, true, etcdUpgrade && !isExternalEtcd)
  514. }
  515. // PerformStaticPodUpgrade performs the upgrade of the control plane components for a static pod hosted cluster
  516. func PerformStaticPodUpgrade(client clientset.Interface, waiter apiclient.Waiter, internalcfg *kubeadmapi.InitConfiguration, etcdUpgrade, renewCerts bool, kustomizeDir string) error {
  517. pathManager, err := GetPathManagerForUpgrade(constants.KubernetesDir, kustomizeDir, internalcfg, etcdUpgrade)
  518. if err != nil {
  519. return err
  520. }
  521. // The arguments oldEtcdClient and newEtdClient, are uninitialized because passing in the clients allow for mocking the client during testing
  522. return StaticPodControlPlane(client, waiter, pathManager, internalcfg, etcdUpgrade, renewCerts, nil, nil)
  523. }
  524. // DryRunStaticPodUpgrade fakes an upgrade of the control plane
  525. func DryRunStaticPodUpgrade(kustomizeDir string, internalcfg *kubeadmapi.InitConfiguration) error {
  526. dryRunManifestDir, err := constants.CreateTempDirForKubeadm("", "kubeadm-upgrade-dryrun")
  527. if err != nil {
  528. return err
  529. }
  530. defer os.RemoveAll(dryRunManifestDir)
  531. if err := controlplane.CreateInitStaticPodManifestFiles(dryRunManifestDir, kustomizeDir, internalcfg); err != nil {
  532. return err
  533. }
  534. // Print the contents of the upgraded manifests and pretend like they were in /etc/kubernetes/manifests
  535. files := []dryrunutil.FileToPrint{}
  536. for _, component := range constants.ControlPlaneComponents {
  537. realPath := constants.GetStaticPodFilepath(component, dryRunManifestDir)
  538. outputPath := constants.GetStaticPodFilepath(component, constants.GetStaticPodDirectory())
  539. files = append(files, dryrunutil.NewFileToPrint(realPath, outputPath))
  540. }
  541. return dryrunutil.PrintDryRunFiles(files, os.Stdout)
  542. }