configsync.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package kubeletconfig
  14. import (
  15. "fmt"
  16. "os"
  17. "time"
  18. "k8s.io/klog"
  19. apiv1 "k8s.io/api/core/v1"
  20. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  21. "k8s.io/apimachinery/pkg/types"
  22. clientset "k8s.io/client-go/kubernetes"
  23. v1core "k8s.io/client-go/kubernetes/typed/core/v1"
  24. "k8s.io/client-go/tools/cache"
  25. "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/checkpoint"
  26. "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/status"
  27. utillog "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/log"
  28. )
  29. const (
  30. // KubeletConfigChangedEventReason identifies an event as a change of Kubelet configuration
  31. KubeletConfigChangedEventReason = "KubeletConfigChanged"
  32. // LocalEventMessage is sent when the Kubelet restarts to use local config
  33. LocalEventMessage = "Kubelet restarting to use local config"
  34. // RemoteEventMessageFmt is sent when the Kubelet restarts to use a remote config
  35. RemoteEventMessageFmt = "Kubelet restarting to use %s, UID: %s, ResourceVersion: %s, KubeletConfigKey: %s"
  36. )
  37. // pokeConfiSourceWorker tells the worker thread that syncs config sources that work needs to be done
  38. func (cc *Controller) pokeConfigSourceWorker() {
  39. select {
  40. case cc.pendingConfigSource <- true:
  41. default:
  42. }
  43. }
  44. // syncConfigSource checks if work needs to be done to use a new configuration, and does that work if necessary
  45. func (cc *Controller) syncConfigSource(client clientset.Interface, eventClient v1core.EventsGetter, nodeName string) {
  46. select {
  47. case <-cc.pendingConfigSource:
  48. default:
  49. // no work to be done, return
  50. return
  51. }
  52. // if the sync fails, we want to retry
  53. var syncerr error
  54. defer func() {
  55. if syncerr != nil {
  56. utillog.Errorf(syncerr.Error())
  57. cc.pokeConfigSourceWorker()
  58. }
  59. }()
  60. // get the latest Node.Spec.ConfigSource from the informer
  61. source, err := latestNodeConfigSource(cc.nodeInformer.GetStore(), nodeName)
  62. if err != nil {
  63. cc.configStatus.SetErrorOverride(fmt.Sprintf(status.SyncErrorFmt, status.InternalError))
  64. syncerr = fmt.Errorf("%s, error: %v", status.InternalError, err)
  65. return
  66. }
  67. // a nil source simply means we reset to local defaults
  68. if source == nil {
  69. utillog.Infof("Node.Spec.ConfigSource is empty, will reset assigned and last-known-good to defaults")
  70. if updated, reason, err := cc.resetConfig(); err != nil {
  71. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  72. cc.configStatus.SetErrorOverride(reason)
  73. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  74. return
  75. } else if updated {
  76. restartForNewConfig(eventClient, nodeName, nil)
  77. }
  78. return
  79. }
  80. // a non-nil source means we should attempt to download the config, and checkpoint it if necessary
  81. utillog.Infof("Node.Spec.ConfigSource is non-empty, will checkpoint source and update config if necessary")
  82. // TODO(mtaufen): It would be nice if we could check the payload's metadata before (re)downloading the whole payload
  83. // we at least try pulling the latest configmap out of the local informer store.
  84. // construct the interface that can dynamically dispatch the correct Download, etc. methods for the given source type
  85. remote, reason, err := checkpoint.NewRemoteConfigSource(source)
  86. if err != nil {
  87. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  88. cc.configStatus.SetErrorOverride(reason)
  89. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  90. return
  91. }
  92. // "download" source, either from informer's in-memory store or directly from the API server, if the informer doesn't have a copy
  93. payload, reason, err := cc.downloadConfigPayload(client, remote)
  94. if err != nil {
  95. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  96. cc.configStatus.SetErrorOverride(reason)
  97. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  98. return
  99. }
  100. // save a checkpoint for the payload, if one does not already exist
  101. if reason, err := cc.saveConfigCheckpoint(remote, payload); err != nil {
  102. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  103. cc.configStatus.SetErrorOverride(reason)
  104. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  105. return
  106. }
  107. // update the local, persistent record of assigned config
  108. if updated, reason, err := cc.setAssignedConfig(remote); err != nil {
  109. reason = fmt.Sprintf(status.SyncErrorFmt, reason)
  110. cc.configStatus.SetErrorOverride(reason)
  111. syncerr = fmt.Errorf("%s, error: %v", reason, err)
  112. return
  113. } else if updated {
  114. restartForNewConfig(eventClient, nodeName, remote)
  115. }
  116. // If we get here:
  117. // - there is no need to restart to use new config
  118. // - there was no error trying to sync configuration
  119. // - if, previously, there was an error trying to sync configuration, we need to clear that error from the status
  120. cc.configStatus.SetErrorOverride("")
  121. }
  122. // Note: source has up-to-date uid and resourceVersion after calling downloadConfigPayload.
  123. func (cc *Controller) downloadConfigPayload(client clientset.Interface, source checkpoint.RemoteConfigSource) (checkpoint.Payload, string, error) {
  124. var store cache.Store
  125. if cc.remoteConfigSourceInformer != nil {
  126. store = cc.remoteConfigSourceInformer.GetStore()
  127. }
  128. return source.Download(client, store)
  129. }
  130. func (cc *Controller) saveConfigCheckpoint(source checkpoint.RemoteConfigSource, payload checkpoint.Payload) (string, error) {
  131. ok, err := cc.checkpointStore.Exists(source)
  132. if err != nil {
  133. return status.InternalError, fmt.Errorf("%s, error: %v", status.InternalError, err)
  134. }
  135. if ok {
  136. utillog.Infof("checkpoint already exists for %s, UID: %s, ResourceVersion: %s", source.APIPath(), payload.UID(), payload.ResourceVersion())
  137. return "", nil
  138. }
  139. if err := cc.checkpointStore.Save(payload); err != nil {
  140. return status.InternalError, fmt.Errorf("%s, error: %v", status.InternalError, err)
  141. }
  142. return "", nil
  143. }
  144. // setAssignedConfig updates the assigned checkpoint config in the store.
  145. // Returns whether the assigned config changed as a result, or a sanitized failure reason and an error.
  146. func (cc *Controller) setAssignedConfig(source checkpoint.RemoteConfigSource) (bool, string, error) {
  147. assigned, err := cc.checkpointStore.Assigned()
  148. if err != nil {
  149. return false, status.InternalError, err
  150. }
  151. if err := cc.checkpointStore.SetAssigned(source); err != nil {
  152. return false, status.InternalError, err
  153. }
  154. return !checkpoint.EqualRemoteConfigSources(assigned, source), "", nil
  155. }
  156. // resetConfig resets the assigned and last-known-good checkpoints in the checkpoint store to their default values and
  157. // returns whether the assigned checkpoint changed as a result, or a sanitized failure reason and an error.
  158. func (cc *Controller) resetConfig() (bool, string, error) {
  159. updated, err := cc.checkpointStore.Reset()
  160. if err != nil {
  161. return false, status.InternalError, err
  162. }
  163. return updated, "", nil
  164. }
  165. // restartForNewConfig presumes the Kubelet is managed by a babysitter, e.g. systemd
  166. // It will send an event before exiting.
  167. func restartForNewConfig(eventClient v1core.EventsGetter, nodeName string, source checkpoint.RemoteConfigSource) {
  168. message := LocalEventMessage
  169. if source != nil {
  170. message = fmt.Sprintf(RemoteEventMessageFmt, source.APIPath(), source.UID(), source.ResourceVersion(), source.KubeletFilename())
  171. }
  172. // we directly log and send the event, instead of using the event recorder,
  173. // because the event recorder won't flush its queue before we exit (we'd lose the event)
  174. event := makeEvent(nodeName, apiv1.EventTypeNormal, KubeletConfigChangedEventReason, message)
  175. klog.V(3).Infof("Event(%#v): type: '%v' reason: '%v' %v", event.InvolvedObject, event.Type, event.Reason, event.Message)
  176. if _, err := eventClient.Events(apiv1.NamespaceDefault).Create(event); err != nil {
  177. utillog.Errorf("failed to send event, error: %v", err)
  178. }
  179. utillog.Infof(message)
  180. os.Exit(0)
  181. }
  182. // latestNodeConfigSource returns a copy of the most recent NodeConfigSource from the Node with `nodeName` in `store`
  183. func latestNodeConfigSource(store cache.Store, nodeName string) (*apiv1.NodeConfigSource, error) {
  184. obj, ok, err := store.GetByKey(nodeName)
  185. if err != nil {
  186. err := fmt.Errorf("failed to retrieve Node %q from informer's store, error: %v", nodeName, err)
  187. utillog.Errorf(err.Error())
  188. return nil, err
  189. } else if !ok {
  190. err := fmt.Errorf("Node %q does not exist in the informer's store, can't sync config source", nodeName)
  191. utillog.Errorf(err.Error())
  192. return nil, err
  193. }
  194. node, ok := obj.(*apiv1.Node)
  195. if !ok {
  196. err := fmt.Errorf("failed to cast object from informer's store to Node, can't sync config source for Node %q", nodeName)
  197. utillog.Errorf(err.Error())
  198. return nil, err
  199. }
  200. // Copy the source, so anyone who modifies it after here doesn't mess up the informer's store!
  201. // This was previously the cause of a bug that made the Kubelet frequently resync config; Download updated
  202. // the UID and ResourceVersion on the NodeConfigSource, but the pointer was still drilling all the way
  203. // into the informer's copy!
  204. return node.Spec.ConfigSource.DeepCopy(), nil
  205. }
  206. // makeEvent constructs an event
  207. // similar to makeEvent in k8s.io/client-go/tools/record/event.go
  208. func makeEvent(nodeName, eventtype, reason, message string) *apiv1.Event {
  209. const componentKubelet = "kubelet"
  210. // NOTE(mtaufen): This is consistent with pkg/kubelet/kubelet.go. Even though setting the node
  211. // name as the UID looks strange, it appears to be conventional for events sent by the Kubelet.
  212. ref := apiv1.ObjectReference{
  213. Kind: "Node",
  214. Name: nodeName,
  215. UID: types.UID(nodeName),
  216. Namespace: "",
  217. }
  218. t := metav1.Time{Time: time.Now()}
  219. namespace := ref.Namespace
  220. if namespace == "" {
  221. namespace = metav1.NamespaceDefault
  222. }
  223. return &apiv1.Event{
  224. ObjectMeta: metav1.ObjectMeta{
  225. Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()),
  226. Namespace: namespace,
  227. },
  228. InvolvedObject: ref,
  229. Reason: reason,
  230. Message: message,
  231. FirstTimestamp: t,
  232. LastTimestamp: t,
  233. Count: 1,
  234. Type: eventtype,
  235. Source: apiv1.EventSource{Component: componentKubelet, Host: string(nodeName)},
  236. }
  237. }