configsync.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package kubeletconfig
  14. import (
  15. "context"
  16. "fmt"
  17. "os"
  18. "time"
  19. "k8s.io/klog"
  20. apiv1 "k8s.io/api/core/v1"
  21. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  22. "k8s.io/apimachinery/pkg/types"
  23. clientset "k8s.io/client-go/kubernetes"
  24. v1core "k8s.io/client-go/kubernetes/typed/core/v1"
  25. "k8s.io/client-go/tools/cache"
  26. "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/checkpoint"
  27. "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/status"
  28. utillog "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/log"
  29. )
  30. const (
  31. // KubeletConfigChangedEventReason identifies an event as a change of Kubelet configuration
  32. KubeletConfigChangedEventReason = "KubeletConfigChanged"
  33. // LocalEventMessage is sent when the Kubelet restarts to use local config
  34. LocalEventMessage = "Kubelet restarting to use local config"
  35. // RemoteEventMessageFmt is sent when the Kubelet restarts to use a remote config
  36. RemoteEventMessageFmt = "Kubelet restarting to use %s, UID: %s, ResourceVersion: %s, KubeletConfigKey: %s"
  37. )
  38. // pokeConfiSourceWorker tells the worker thread that syncs config sources that work needs to be done
  39. func (cc *Controller) pokeConfigSourceWorker() {
  40. select {
  41. case cc.pendingConfigSource <- true:
  42. default:
  43. }
  44. }
  45. // syncConfigSource checks if work needs to be done to use a new configuration, and does that work if necessary
  46. func (cc *Controller) syncConfigSource(client clientset.Interface, eventClient v1core.EventsGetter, nodeName string) {
  47. select {
  48. case <-cc.pendingConfigSource:
  49. default:
  50. // no work to be done, return
  51. return
  52. }
  53. // if the sync fails, we want to retry
  54. var syncerr error
  55. defer func() {
  56. if syncerr != nil {
  57. utillog.Errorf(syncerr.Error())
  58. cc.pokeConfigSourceWorker()
  59. }
  60. }()
  61. // get the latest Node.Spec.ConfigSource from the informer
  62. source, err := latestNodeConfigSource(cc.nodeInformer.GetStore(), nodeName)
  63. if err != nil {
  64. cc.configStatus.SetErrorOverride(fmt.Sprintf(status.SyncErrorFmt, status.InternalError))
  65. syncerr = fmt.Errorf("%s, error: %v", status.InternalError, err)
  66. return
  67. }
  68. // a nil source simply means we reset to local defaults
  69. if source == nil {
  70. utillog.Infof("Node.Spec.ConfigSource is empty, will reset assigned and last-known-good to defaults")
  71. if updated, reason, err := cc.resetConfig(); err != nil {
  72. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  73. cc.configStatus.SetErrorOverride(reason)
  74. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  75. return
  76. } else if updated {
  77. restartForNewConfig(eventClient, nodeName, nil)
  78. }
  79. return
  80. }
  81. // a non-nil source means we should attempt to download the config, and checkpoint it if necessary
  82. utillog.Infof("Node.Spec.ConfigSource is non-empty, will checkpoint source and update config if necessary")
  83. // TODO(mtaufen): It would be nice if we could check the payload's metadata before (re)downloading the whole payload
  84. // we at least try pulling the latest configmap out of the local informer store.
  85. // construct the interface that can dynamically dispatch the correct Download, etc. methods for the given source type
  86. remote, reason, err := checkpoint.NewRemoteConfigSource(source)
  87. if err != nil {
  88. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  89. cc.configStatus.SetErrorOverride(reason)
  90. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  91. return
  92. }
  93. // "download" source, either from informer's in-memory store or directly from the API server, if the informer doesn't have a copy
  94. payload, reason, err := cc.downloadConfigPayload(client, remote)
  95. if err != nil {
  96. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  97. cc.configStatus.SetErrorOverride(reason)
  98. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  99. return
  100. }
  101. // save a checkpoint for the payload, if one does not already exist
  102. if reason, err := cc.saveConfigCheckpoint(remote, payload); err != nil {
  103. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  104. cc.configStatus.SetErrorOverride(reason)
  105. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  106. return
  107. }
  108. // update the local, persistent record of assigned config
  109. if updated, reason, err := cc.setAssignedConfig(remote); err != nil {
  110. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  111. cc.configStatus.SetErrorOverride(reason)
  112. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  113. return
  114. } else if updated {
  115. restartForNewConfig(eventClient, nodeName, remote)
  116. }
  117. // If we get here:
  118. // - there is no need to restart to use new config
  119. // - there was no error trying to sync configuration
  120. // - if, previously, there was an error trying to sync configuration, we need to clear that error from the status
  121. cc.configStatus.SetErrorOverride("")
  122. }
  123. // Note: source has up-to-date uid and resourceVersion after calling downloadConfigPayload.
  124. func (cc *Controller) downloadConfigPayload(client clientset.Interface, source checkpoint.RemoteConfigSource) (checkpoint.Payload, string, error) {
  125. var store cache.Store
  126. if cc.remoteConfigSourceInformer != nil {
  127. store = cc.remoteConfigSourceInformer.GetStore()
  128. }
  129. return source.Download(client, store)
  130. }
  131. func (cc *Controller) saveConfigCheckpoint(source checkpoint.RemoteConfigSource, payload checkpoint.Payload) (string, error) {
  132. ok, err := cc.checkpointStore.Exists(source)
  133. if err != nil {
  134. return status.InternalError, fmt.Errorf("%s, error: %v", status.InternalError, err)
  135. }
  136. if ok {
  137. utillog.Infof("checkpoint already exists for %s, UID: %s, ResourceVersion: %s", source.APIPath(), payload.UID(), payload.ResourceVersion())
  138. return "", nil
  139. }
  140. if err := cc.checkpointStore.Save(payload); err != nil {
  141. return status.InternalError, fmt.Errorf("%s, error: %v", status.InternalError, err)
  142. }
  143. return "", nil
  144. }
  145. // setAssignedConfig updates the assigned checkpoint config in the store.
  146. // Returns whether the assigned config changed as a result, or a sanitized failure reason and an error.
  147. func (cc *Controller) setAssignedConfig(source checkpoint.RemoteConfigSource) (bool, string, error) {
  148. assigned, err := cc.checkpointStore.Assigned()
  149. if err != nil {
  150. return false, status.InternalError, err
  151. }
  152. if err := cc.checkpointStore.SetAssigned(source); err != nil {
  153. return false, status.InternalError, err
  154. }
  155. return !checkpoint.EqualRemoteConfigSources(assigned, source), "", nil
  156. }
  157. // resetConfig resets the assigned and last-known-good checkpoints in the checkpoint store to their default values and
  158. // returns whether the assigned checkpoint changed as a result, or a sanitized failure reason and an error.
  159. func (cc *Controller) resetConfig() (bool, string, error) {
  160. updated, err := cc.checkpointStore.Reset()
  161. if err != nil {
  162. return false, status.InternalError, err
  163. }
  164. return updated, "", nil
  165. }
  166. // restartForNewConfig presumes the Kubelet is managed by a babysitter, e.g. systemd
  167. // It will send an event before exiting.
  168. func restartForNewConfig(eventClient v1core.EventsGetter, nodeName string, source checkpoint.RemoteConfigSource) {
  169. message := LocalEventMessage
  170. if source != nil {
  171. message = fmt.Sprintf(RemoteEventMessageFmt, source.APIPath(), source.UID(), source.ResourceVersion(), source.KubeletFilename())
  172. }
  173. // we directly log and send the event, instead of using the event recorder,
  174. // because the event recorder won't flush its queue before we exit (we'd lose the event)
  175. event := makeEvent(nodeName, apiv1.EventTypeNormal, KubeletConfigChangedEventReason, message)
  176. klog.V(3).Infof("Event(%#v): type: '%v' reason: '%v' %v", event.InvolvedObject, event.Type, event.Reason, event.Message)
  177. if _, err := eventClient.Events(apiv1.NamespaceDefault).Create(context.TODO(), event, metav1.CreateOptions{}); err != nil {
  178. utillog.Errorf("failed to send event, error: %v", err)
  179. }
  180. utillog.Infof(message)
  181. os.Exit(0)
  182. }
  183. // latestNodeConfigSource returns a copy of the most recent NodeConfigSource from the Node with `nodeName` in `store`
  184. func latestNodeConfigSource(store cache.Store, nodeName string) (*apiv1.NodeConfigSource, error) {
  185. obj, ok, err := store.GetByKey(nodeName)
  186. if err != nil {
  187. err := fmt.Errorf("failed to retrieve Node %q from informer's store, error: %v", nodeName, err)
  188. utillog.Errorf(err.Error())
  189. return nil, err
  190. } else if !ok {
  191. err := fmt.Errorf("node %q does not exist in the informer's store, can't sync config source", nodeName)
  192. utillog.Errorf(err.Error())
  193. return nil, err
  194. }
  195. node, ok := obj.(*apiv1.Node)
  196. if !ok {
  197. err := fmt.Errorf("failed to cast object from informer's store to Node, can't sync config source for Node %q", nodeName)
  198. utillog.Errorf(err.Error())
  199. return nil, err
  200. }
  201. // Copy the source, so anyone who modifies it after here doesn't mess up the informer's store!
  202. // This was previously the cause of a bug that made the Kubelet frequently resync config; Download updated
  203. // the UID and ResourceVersion on the NodeConfigSource, but the pointer was still drilling all the way
  204. // into the informer's copy!
  205. return node.Spec.ConfigSource.DeepCopy(), nil
  206. }
  207. // makeEvent constructs an event
  208. // similar to makeEvent in k8s.io/client-go/tools/record/event.go
  209. func makeEvent(nodeName, eventtype, reason, message string) *apiv1.Event {
  210. const componentKubelet = "kubelet"
  211. // NOTE(mtaufen): This is consistent with pkg/kubelet/kubelet.go. Even though setting the node
  212. // name as the UID looks strange, it appears to be conventional for events sent by the Kubelet.
  213. ref := apiv1.ObjectReference{
  214. Kind: "Node",
  215. Name: nodeName,
  216. UID: types.UID(nodeName),
  217. Namespace: "",
  218. }
  219. t := metav1.Time{Time: time.Now()}
  220. namespace := ref.Namespace
  221. if namespace == "" {
  222. namespace = metav1.NamespaceDefault
  223. }
  224. return &apiv1.Event{
  225. ObjectMeta: metav1.ObjectMeta{
  226. Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()),
  227. Namespace: namespace,
  228. },
  229. InvolvedObject: ref,
  230. Reason: reason,
  231. Message: message,
  232. FirstTimestamp: t,
  233. LastTimestamp: t,
  234. Count: 1,
  235. Type: eventtype,
  236. Source: apiv1.EventSource{Component: componentKubelet, Host: string(nodeName)},
  237. }
  238. }