kubelet_node_status.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package kubelet
  14. import (
  15. "context"
  16. "fmt"
  17. "net"
  18. goruntime "runtime"
  19. "sort"
  20. "time"
  21. "k8s.io/klog"
  22. v1 "k8s.io/api/core/v1"
  23. apiequality "k8s.io/apimachinery/pkg/api/equality"
  24. apierrors "k8s.io/apimachinery/pkg/api/errors"
  25. "k8s.io/apimachinery/pkg/api/resource"
  26. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  27. "k8s.io/apimachinery/pkg/types"
  28. utilfeature "k8s.io/apiserver/pkg/util/feature"
  29. cloudprovider "k8s.io/cloud-provider"
  30. k8s_api_v1 "k8s.io/kubernetes/pkg/apis/core/v1"
  31. v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
  32. "k8s.io/kubernetes/pkg/features"
  33. kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
  34. "k8s.io/kubernetes/pkg/kubelet/events"
  35. "k8s.io/kubernetes/pkg/kubelet/nodestatus"
  36. "k8s.io/kubernetes/pkg/kubelet/util"
  37. schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
  38. nodeutil "k8s.io/kubernetes/pkg/util/node"
  39. taintutil "k8s.io/kubernetes/pkg/util/taints"
  40. volutil "k8s.io/kubernetes/pkg/volume/util"
  41. )
  42. // registerWithAPIServer registers the node with the cluster master. It is safe
  43. // to call multiple times, but not concurrently (kl.registrationCompleted is
  44. // not locked).
  45. func (kl *Kubelet) registerWithAPIServer() {
  46. if kl.registrationCompleted {
  47. return
  48. }
  49. step := 100 * time.Millisecond
  50. for {
  51. time.Sleep(step)
  52. step = step * 2
  53. if step >= 7*time.Second {
  54. step = 7 * time.Second
  55. }
  56. node, err := kl.initialNode()
  57. if err != nil {
  58. klog.Errorf("Unable to construct v1.Node object for kubelet: %v", err)
  59. continue
  60. }
  61. klog.Infof("Attempting to register node %s", node.Name)
  62. registered := kl.tryRegisterWithAPIServer(node)
  63. if registered {
  64. klog.Infof("Successfully registered node %s", node.Name)
  65. kl.registrationCompleted = true
  66. return
  67. }
  68. }
  69. }
  70. // tryRegisterWithAPIServer makes an attempt to register the given node with
  71. // the API server, returning a boolean indicating whether the attempt was
  72. // successful. If a node with the same name already exists, it reconciles the
  73. // value of the annotation for controller-managed attach-detach of attachable
  74. // persistent volumes for the node.
  75. func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
  76. _, err := kl.kubeClient.CoreV1().Nodes().Create(node)
  77. if err == nil {
  78. return true
  79. }
  80. if !apierrors.IsAlreadyExists(err) {
  81. klog.Errorf("Unable to register node %q with API server: %v", kl.nodeName, err)
  82. return false
  83. }
  84. existingNode, err := kl.kubeClient.CoreV1().Nodes().Get(string(kl.nodeName), metav1.GetOptions{})
  85. if err != nil {
  86. klog.Errorf("Unable to register node %q with API server: error getting existing node: %v", kl.nodeName, err)
  87. return false
  88. }
  89. if existingNode == nil {
  90. klog.Errorf("Unable to register node %q with API server: no node instance returned", kl.nodeName)
  91. return false
  92. }
  93. originalNode := existingNode.DeepCopy()
  94. if originalNode == nil {
  95. klog.Errorf("Nil %q node object", kl.nodeName)
  96. return false
  97. }
  98. klog.Infof("Node %s was previously registered", kl.nodeName)
  99. // Edge case: the node was previously registered; reconcile
  100. // the value of the controller-managed attach-detach
  101. // annotation.
  102. requiresUpdate := kl.reconcileCMADAnnotationWithExistingNode(node, existingNode)
  103. requiresUpdate = kl.updateDefaultLabels(node, existingNode) || requiresUpdate
  104. requiresUpdate = kl.reconcileExtendedResource(node, existingNode) || requiresUpdate
  105. if requiresUpdate {
  106. if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil {
  107. klog.Errorf("Unable to reconcile node %q with API server: error updating node: %v", kl.nodeName, err)
  108. return false
  109. }
  110. }
  111. return true
  112. }
  113. // Zeros out extended resource capacity during reconciliation.
  114. func (kl *Kubelet) reconcileExtendedResource(initialNode, node *v1.Node) bool {
  115. requiresUpdate := false
  116. // Check with the device manager to see if node has been recreated, in which case extended resources should be zeroed until they are available
  117. if kl.containerManager.ShouldResetExtendedResourceCapacity() {
  118. for k := range node.Status.Capacity {
  119. if v1helper.IsExtendedResourceName(k) {
  120. klog.Infof("Zero out resource %s capacity in existing node.", k)
  121. node.Status.Capacity[k] = *resource.NewQuantity(int64(0), resource.DecimalSI)
  122. node.Status.Allocatable[k] = *resource.NewQuantity(int64(0), resource.DecimalSI)
  123. requiresUpdate = true
  124. }
  125. }
  126. }
  127. return requiresUpdate
  128. }
  129. // updateDefaultLabels will set the default labels on the node
  130. func (kl *Kubelet) updateDefaultLabels(initialNode, existingNode *v1.Node) bool {
  131. defaultLabels := []string{
  132. v1.LabelHostname,
  133. v1.LabelZoneFailureDomain,
  134. v1.LabelZoneRegion,
  135. v1.LabelInstanceType,
  136. v1.LabelOSStable,
  137. v1.LabelArchStable,
  138. kubeletapis.LabelOS,
  139. kubeletapis.LabelArch,
  140. }
  141. needsUpdate := false
  142. if existingNode.Labels == nil {
  143. existingNode.Labels = make(map[string]string)
  144. }
  145. //Set default labels but make sure to not set labels with empty values
  146. for _, label := range defaultLabels {
  147. if _, hasInitialValue := initialNode.Labels[label]; !hasInitialValue {
  148. continue
  149. }
  150. if existingNode.Labels[label] != initialNode.Labels[label] {
  151. existingNode.Labels[label] = initialNode.Labels[label]
  152. needsUpdate = true
  153. }
  154. if existingNode.Labels[label] == "" {
  155. delete(existingNode.Labels, label)
  156. }
  157. }
  158. return needsUpdate
  159. }
  160. // reconcileCMADAnnotationWithExistingNode reconciles the controller-managed
  161. // attach-detach annotation on a new node and the existing node, returning
  162. // whether the existing node must be updated.
  163. func (kl *Kubelet) reconcileCMADAnnotationWithExistingNode(node, existingNode *v1.Node) bool {
  164. var (
  165. existingCMAAnnotation = existingNode.Annotations[volutil.ControllerManagedAttachAnnotation]
  166. newCMAAnnotation, newSet = node.Annotations[volutil.ControllerManagedAttachAnnotation]
  167. )
  168. if newCMAAnnotation == existingCMAAnnotation {
  169. return false
  170. }
  171. // If the just-constructed node and the existing node do
  172. // not have the same value, update the existing node with
  173. // the correct value of the annotation.
  174. if !newSet {
  175. klog.Info("Controller attach-detach setting changed to false; updating existing Node")
  176. delete(existingNode.Annotations, volutil.ControllerManagedAttachAnnotation)
  177. } else {
  178. klog.Info("Controller attach-detach setting changed to true; updating existing Node")
  179. if existingNode.Annotations == nil {
  180. existingNode.Annotations = make(map[string]string)
  181. }
  182. existingNode.Annotations[volutil.ControllerManagedAttachAnnotation] = newCMAAnnotation
  183. }
  184. return true
  185. }
  186. // initialNode constructs the initial v1.Node for this Kubelet, incorporating node
  187. // labels, information from the cloud provider, and Kubelet configuration.
  188. func (kl *Kubelet) initialNode() (*v1.Node, error) {
  189. node := &v1.Node{
  190. ObjectMeta: metav1.ObjectMeta{
  191. Name: string(kl.nodeName),
  192. Labels: map[string]string{
  193. v1.LabelHostname: kl.hostname,
  194. v1.LabelOSStable: goruntime.GOOS,
  195. v1.LabelArchStable: goruntime.GOARCH,
  196. kubeletapis.LabelOS: goruntime.GOOS,
  197. kubeletapis.LabelArch: goruntime.GOARCH,
  198. },
  199. },
  200. Spec: v1.NodeSpec{
  201. Unschedulable: !kl.registerSchedulable,
  202. },
  203. }
  204. nodeTaints := make([]v1.Taint, 0)
  205. if len(kl.registerWithTaints) > 0 {
  206. taints := make([]v1.Taint, len(kl.registerWithTaints))
  207. for i := range kl.registerWithTaints {
  208. if err := k8s_api_v1.Convert_core_Taint_To_v1_Taint(&kl.registerWithTaints[i], &taints[i], nil); err != nil {
  209. return nil, err
  210. }
  211. }
  212. nodeTaints = append(nodeTaints, taints...)
  213. }
  214. unschedulableTaint := v1.Taint{
  215. Key: schedulerapi.TaintNodeUnschedulable,
  216. Effect: v1.TaintEffectNoSchedule,
  217. }
  218. // If TaintNodesByCondition enabled, taint node with TaintNodeUnschedulable when initializing
  219. // node to avoid race condition; refer to #63897 for more detail.
  220. if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) {
  221. if node.Spec.Unschedulable &&
  222. !taintutil.TaintExists(nodeTaints, &unschedulableTaint) {
  223. nodeTaints = append(nodeTaints, unschedulableTaint)
  224. }
  225. }
  226. if kl.externalCloudProvider {
  227. taint := v1.Taint{
  228. Key: schedulerapi.TaintExternalCloudProvider,
  229. Value: "true",
  230. Effect: v1.TaintEffectNoSchedule,
  231. }
  232. nodeTaints = append(nodeTaints, taint)
  233. }
  234. if len(nodeTaints) > 0 {
  235. node.Spec.Taints = nodeTaints
  236. }
  237. // Initially, set NodeNetworkUnavailable to true.
  238. if kl.providerRequiresNetworkingConfiguration() {
  239. node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
  240. Type: v1.NodeNetworkUnavailable,
  241. Status: v1.ConditionTrue,
  242. Reason: "NoRouteCreated",
  243. Message: "Node created without a route",
  244. LastTransitionTime: metav1.NewTime(kl.clock.Now()),
  245. })
  246. }
  247. if kl.enableControllerAttachDetach {
  248. if node.Annotations == nil {
  249. node.Annotations = make(map[string]string)
  250. }
  251. klog.Infof("Setting node annotation to enable volume controller attach/detach")
  252. node.Annotations[volutil.ControllerManagedAttachAnnotation] = "true"
  253. } else {
  254. klog.Infof("Controller attach/detach is disabled for this node; Kubelet will attach and detach volumes")
  255. }
  256. if kl.keepTerminatedPodVolumes {
  257. if node.Annotations == nil {
  258. node.Annotations = make(map[string]string)
  259. }
  260. klog.Infof("Setting node annotation to keep pod volumes of terminated pods attached to the node")
  261. node.Annotations[volutil.KeepTerminatedPodVolumesAnnotation] = "true"
  262. }
  263. // @question: should this be place after the call to the cloud provider? which also applies labels
  264. for k, v := range kl.nodeLabels {
  265. if cv, found := node.ObjectMeta.Labels[k]; found {
  266. klog.Warningf("the node label %s=%s will overwrite default setting %s", k, v, cv)
  267. }
  268. node.ObjectMeta.Labels[k] = v
  269. }
  270. if kl.providerID != "" {
  271. node.Spec.ProviderID = kl.providerID
  272. }
  273. if kl.cloud != nil {
  274. instances, ok := kl.cloud.Instances()
  275. if !ok {
  276. return nil, fmt.Errorf("failed to get instances from cloud provider")
  277. }
  278. // TODO: We can't assume that the node has credentials to talk to the
  279. // cloudprovider from arbitrary nodes. At most, we should talk to a
  280. // local metadata server here.
  281. var err error
  282. if node.Spec.ProviderID == "" {
  283. node.Spec.ProviderID, err = cloudprovider.GetInstanceProviderID(context.TODO(), kl.cloud, kl.nodeName)
  284. if err != nil {
  285. return nil, err
  286. }
  287. }
  288. instanceType, err := instances.InstanceType(context.TODO(), kl.nodeName)
  289. if err != nil {
  290. return nil, err
  291. }
  292. if instanceType != "" {
  293. klog.Infof("Adding node label from cloud provider: %s=%s", v1.LabelInstanceType, instanceType)
  294. node.ObjectMeta.Labels[v1.LabelInstanceType] = instanceType
  295. }
  296. // If the cloud has zone information, label the node with the zone information
  297. zones, ok := kl.cloud.Zones()
  298. if ok {
  299. zone, err := zones.GetZone(context.TODO())
  300. if err != nil {
  301. return nil, fmt.Errorf("failed to get zone from cloud provider: %v", err)
  302. }
  303. if zone.FailureDomain != "" {
  304. klog.Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneFailureDomain, zone.FailureDomain)
  305. node.ObjectMeta.Labels[v1.LabelZoneFailureDomain] = zone.FailureDomain
  306. }
  307. if zone.Region != "" {
  308. klog.Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneRegion, zone.Region)
  309. node.ObjectMeta.Labels[v1.LabelZoneRegion] = zone.Region
  310. }
  311. }
  312. }
  313. kl.setNodeStatus(node)
  314. return node, nil
  315. }
  316. // syncNodeStatus should be called periodically from a goroutine.
  317. // It synchronizes node status to master if there is any change or enough time
  318. // passed from the last sync, registering the kubelet first if necessary.
  319. func (kl *Kubelet) syncNodeStatus() {
  320. kl.syncNodeStatusMux.Lock()
  321. defer kl.syncNodeStatusMux.Unlock()
  322. if kl.kubeClient == nil || kl.heartbeatClient == nil {
  323. return
  324. }
  325. if kl.registerNode {
  326. // This will exit immediately if it doesn't need to do anything.
  327. kl.registerWithAPIServer()
  328. }
  329. if err := kl.updateNodeStatus(); err != nil {
  330. klog.Errorf("Unable to update node status: %v", err)
  331. }
  332. }
  333. // updateNodeStatus updates node status to master with retries if there is any
  334. // change or enough time passed from the last sync.
  335. func (kl *Kubelet) updateNodeStatus() error {
  336. klog.V(5).Infof("Updating node status")
  337. for i := 0; i < nodeStatusUpdateRetry; i++ {
  338. if err := kl.tryUpdateNodeStatus(i); err != nil {
  339. if i > 0 && kl.onRepeatedHeartbeatFailure != nil {
  340. kl.onRepeatedHeartbeatFailure()
  341. }
  342. klog.Errorf("Error updating node status, will retry: %v", err)
  343. } else {
  344. return nil
  345. }
  346. }
  347. return fmt.Errorf("update node status exceeds retry count")
  348. }
  349. // tryUpdateNodeStatus tries to update node status to master if there is any
  350. // change or enough time passed from the last sync.
  351. func (kl *Kubelet) tryUpdateNodeStatus(tryNumber int) error {
  352. // In large clusters, GET and PUT operations on Node objects coming
  353. // from here are the majority of load on apiserver and etcd.
  354. // To reduce the load on etcd, we are serving GET operations from
  355. // apiserver cache (the data might be slightly delayed but it doesn't
  356. // seem to cause more conflict - the delays are pretty small).
  357. // If it result in a conflict, all retries are served directly from etcd.
  358. opts := metav1.GetOptions{}
  359. if tryNumber == 0 {
  360. util.FromApiserverCache(&opts)
  361. }
  362. node, err := kl.heartbeatClient.CoreV1().Nodes().Get(string(kl.nodeName), opts)
  363. if err != nil {
  364. return fmt.Errorf("error getting node %q: %v", kl.nodeName, err)
  365. }
  366. originalNode := node.DeepCopy()
  367. if originalNode == nil {
  368. return fmt.Errorf("nil %q node object", kl.nodeName)
  369. }
  370. podCIDRChanged := false
  371. if node.Spec.PodCIDR != "" {
  372. // Pod CIDR could have been updated before, so we cannot rely on
  373. // node.Spec.PodCIDR being non-empty. We also need to know if pod CIDR is
  374. // actually changed.
  375. if podCIDRChanged, err = kl.updatePodCIDR(node.Spec.PodCIDR); err != nil {
  376. klog.Errorf(err.Error())
  377. }
  378. }
  379. kl.setNodeStatus(node)
  380. now := kl.clock.Now()
  381. if utilfeature.DefaultFeatureGate.Enabled(features.NodeLease) && now.Before(kl.lastStatusReportTime.Add(kl.nodeStatusReportFrequency)) {
  382. if !podCIDRChanged && !nodeStatusHasChanged(&originalNode.Status, &node.Status) {
  383. // We must mark the volumes as ReportedInUse in volume manager's dsw even
  384. // if no changes were made to the node status (no volumes were added or removed
  385. // from the VolumesInUse list).
  386. //
  387. // The reason is that on a kubelet restart, the volume manager's dsw is
  388. // repopulated and the volume ReportedInUse is initialized to false, while the
  389. // VolumesInUse list from the Node object still contains the state from the
  390. // previous kubelet instantiation.
  391. //
  392. // Once the volumes are added to the dsw, the ReportedInUse field needs to be
  393. // synced from the VolumesInUse list in the Node.Status.
  394. //
  395. // The MarkVolumesAsReportedInUse() call cannot be performed in dsw directly
  396. // because it does not have access to the Node object.
  397. // This also cannot be populated on node status manager init because the volume
  398. // may not have been added to dsw at that time.
  399. kl.volumeManager.MarkVolumesAsReportedInUse(node.Status.VolumesInUse)
  400. return nil
  401. }
  402. }
  403. // Patch the current status on the API server
  404. updatedNode, _, err := nodeutil.PatchNodeStatus(kl.heartbeatClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, node)
  405. if err != nil {
  406. return err
  407. }
  408. kl.lastStatusReportTime = now
  409. kl.setLastObservedNodeAddresses(updatedNode.Status.Addresses)
  410. // If update finishes successfully, mark the volumeInUse as reportedInUse to indicate
  411. // those volumes are already updated in the node's status
  412. kl.volumeManager.MarkVolumesAsReportedInUse(updatedNode.Status.VolumesInUse)
  413. return nil
  414. }
  415. // recordNodeStatusEvent records an event of the given type with the given
  416. // message for the node.
  417. func (kl *Kubelet) recordNodeStatusEvent(eventType, event string) {
  418. klog.V(2).Infof("Recording %s event message for node %s", event, kl.nodeName)
  419. // TODO: This requires a transaction, either both node status is updated
  420. // and event is recorded or neither should happen, see issue #6055.
  421. kl.recorder.Eventf(kl.nodeRef, eventType, event, "Node %s status is now: %s", kl.nodeName, event)
  422. }
  423. // recordEvent records an event for this node, the Kubelet's nodeRef is passed to the recorder
  424. func (kl *Kubelet) recordEvent(eventType, event, message string) {
  425. kl.recorder.Eventf(kl.nodeRef, eventType, event, message)
  426. }
  427. // record if node schedulable change.
  428. func (kl *Kubelet) recordNodeSchedulableEvent(node *v1.Node) error {
  429. kl.lastNodeUnschedulableLock.Lock()
  430. defer kl.lastNodeUnschedulableLock.Unlock()
  431. if kl.lastNodeUnschedulable != node.Spec.Unschedulable {
  432. if node.Spec.Unschedulable {
  433. kl.recordNodeStatusEvent(v1.EventTypeNormal, events.NodeNotSchedulable)
  434. } else {
  435. kl.recordNodeStatusEvent(v1.EventTypeNormal, events.NodeSchedulable)
  436. }
  437. kl.lastNodeUnschedulable = node.Spec.Unschedulable
  438. }
  439. return nil
  440. }
  441. // setNodeStatus fills in the Status fields of the given Node, overwriting
  442. // any fields that are currently set.
  443. // TODO(madhusudancs): Simplify the logic for setting node conditions and
  444. // refactor the node status condition code out to a different file.
  445. func (kl *Kubelet) setNodeStatus(node *v1.Node) {
  446. for i, f := range kl.setNodeStatusFuncs {
  447. klog.V(5).Infof("Setting node status at position %v", i)
  448. if err := f(node); err != nil {
  449. klog.Warningf("Failed to set some node status fields: %s", err)
  450. }
  451. }
  452. }
  453. func (kl *Kubelet) setLastObservedNodeAddresses(addresses []v1.NodeAddress) {
  454. kl.lastObservedNodeAddressesMux.Lock()
  455. defer kl.lastObservedNodeAddressesMux.Unlock()
  456. kl.lastObservedNodeAddresses = addresses
  457. }
  458. func (kl *Kubelet) getLastObservedNodeAddresses() []v1.NodeAddress {
  459. kl.lastObservedNodeAddressesMux.RLock()
  460. defer kl.lastObservedNodeAddressesMux.RUnlock()
  461. return kl.lastObservedNodeAddresses
  462. }
  463. // defaultNodeStatusFuncs is a factory that generates the default set of
  464. // setNodeStatus funcs
  465. func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
  466. // if cloud is not nil, we expect the cloud resource sync manager to exist
  467. var nodeAddressesFunc func() ([]v1.NodeAddress, error)
  468. if kl.cloud != nil {
  469. nodeAddressesFunc = kl.cloudResourceSyncManager.NodeAddresses
  470. }
  471. var validateHostFunc func() error
  472. if kl.appArmorValidator != nil {
  473. validateHostFunc = kl.appArmorValidator.ValidateHost
  474. }
  475. var setters []func(n *v1.Node) error
  476. setters = append(setters,
  477. nodestatus.NodeAddress(kl.nodeIP, kl.nodeIPValidator, kl.hostname, kl.hostnameOverridden, kl.externalCloudProvider, kl.cloud, nodeAddressesFunc),
  478. nodestatus.MachineInfo(string(kl.nodeName), kl.maxPods, kl.podsPerCore, kl.GetCachedMachineInfo, kl.containerManager.GetCapacity,
  479. kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent),
  480. nodestatus.VersionInfo(kl.cadvisor.VersionInfo, kl.containerRuntime.Type, kl.containerRuntime.Version),
  481. nodestatus.DaemonEndpoints(kl.daemonEndpoints),
  482. nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList),
  483. nodestatus.GoRuntime(),
  484. )
  485. if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
  486. setters = append(setters, nodestatus.VolumeLimits(kl.volumePluginMgr.ListVolumePluginWithLimits))
  487. }
  488. setters = append(setters,
  489. nodestatus.MemoryPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderMemoryPressure, kl.recordNodeStatusEvent),
  490. nodestatus.DiskPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderDiskPressure, kl.recordNodeStatusEvent),
  491. nodestatus.PIDPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderPIDPressure, kl.recordNodeStatusEvent),
  492. nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors, validateHostFunc, kl.containerManager.Status, kl.recordNodeStatusEvent),
  493. nodestatus.VolumesInUse(kl.volumeManager.ReconcilerStatesHasBeenSynced, kl.volumeManager.GetVolumesInUse),
  494. nodestatus.RemoveOutOfDiskCondition(),
  495. // TODO(mtaufen): I decided not to move this setter for now, since all it does is send an event
  496. // and record state back to the Kubelet runtime object. In the future, I'd like to isolate
  497. // these side-effects by decoupling the decisions to send events and partial status recording
  498. // from the Node setters.
  499. kl.recordNodeSchedulableEvent,
  500. )
  501. return setters
  502. }
  503. // Validate given node IP belongs to the current host
  504. func validateNodeIP(nodeIP net.IP) error {
  505. // Honor IP limitations set in setNodeStatus()
  506. if nodeIP.To4() == nil && nodeIP.To16() == nil {
  507. return fmt.Errorf("nodeIP must be a valid IP address")
  508. }
  509. if nodeIP.IsLoopback() {
  510. return fmt.Errorf("nodeIP can't be loopback address")
  511. }
  512. if nodeIP.IsMulticast() {
  513. return fmt.Errorf("nodeIP can't be a multicast address")
  514. }
  515. if nodeIP.IsLinkLocalUnicast() {
  516. return fmt.Errorf("nodeIP can't be a link-local unicast address")
  517. }
  518. if nodeIP.IsUnspecified() {
  519. return fmt.Errorf("nodeIP can't be an all zeros address")
  520. }
  521. addrs, err := net.InterfaceAddrs()
  522. if err != nil {
  523. return err
  524. }
  525. for _, addr := range addrs {
  526. var ip net.IP
  527. switch v := addr.(type) {
  528. case *net.IPNet:
  529. ip = v.IP
  530. case *net.IPAddr:
  531. ip = v.IP
  532. }
  533. if ip != nil && ip.Equal(nodeIP) {
  534. return nil
  535. }
  536. }
  537. return fmt.Errorf("Node IP: %q not found in the host's network interfaces", nodeIP.String())
  538. }
  539. // nodeStatusHasChanged compares the original node and current node's status and
  540. // returns true if any change happens. The heartbeat timestamp is ignored.
  541. func nodeStatusHasChanged(originalStatus *v1.NodeStatus, status *v1.NodeStatus) bool {
  542. if originalStatus == nil && status == nil {
  543. return false
  544. }
  545. if originalStatus == nil || status == nil {
  546. return true
  547. }
  548. // Compare node conditions here because we need to ignore the heartbeat timestamp.
  549. if nodeConditionsHaveChanged(originalStatus.Conditions, status.Conditions) {
  550. return true
  551. }
  552. // Compare other fields of NodeStatus.
  553. originalStatusCopy := originalStatus.DeepCopy()
  554. statusCopy := status.DeepCopy()
  555. originalStatusCopy.Conditions = nil
  556. statusCopy.Conditions = nil
  557. return !apiequality.Semantic.DeepEqual(originalStatusCopy, statusCopy)
  558. }
  559. // nodeConditionsHaveChanged compares the original node and current node's
  560. // conditions and returns true if any change happens. The heartbeat timestamp is
  561. // ignored.
  562. func nodeConditionsHaveChanged(originalConditions []v1.NodeCondition, conditions []v1.NodeCondition) bool {
  563. if len(originalConditions) != len(conditions) {
  564. return true
  565. }
  566. originalConditionsCopy := make([]v1.NodeCondition, 0, len(originalConditions))
  567. originalConditionsCopy = append(originalConditionsCopy, originalConditions...)
  568. conditionsCopy := make([]v1.NodeCondition, 0, len(conditions))
  569. conditionsCopy = append(conditionsCopy, conditions...)
  570. sort.SliceStable(originalConditionsCopy, func(i, j int) bool { return originalConditionsCopy[i].Type < originalConditionsCopy[j].Type })
  571. sort.SliceStable(conditionsCopy, func(i, j int) bool { return conditionsCopy[i].Type < conditionsCopy[j].Type })
  572. replacedheartbeatTime := metav1.Time{}
  573. for i := range conditionsCopy {
  574. originalConditionsCopy[i].LastHeartbeatTime = replacedheartbeatTime
  575. conditionsCopy[i].LastHeartbeatTime = replacedheartbeatTime
  576. if !apiequality.Semantic.DeepEqual(&originalConditionsCopy[i], &conditionsCopy[i]) {
  577. return true
  578. }
  579. }
  580. return false
  581. }