setters.go 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765
  1. /*
  2. Copyright 2018 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package nodestatus
  14. import (
  15. "fmt"
  16. "math"
  17. "net"
  18. goruntime "runtime"
  19. "strings"
  20. "time"
  21. cadvisorapiv1 "github.com/google/cadvisor/info/v1"
  22. "k8s.io/api/core/v1"
  23. "k8s.io/apimachinery/pkg/api/resource"
  24. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  25. "k8s.io/apimachinery/pkg/util/errors"
  26. utilnet "k8s.io/apimachinery/pkg/util/net"
  27. utilfeature "k8s.io/apiserver/pkg/util/feature"
  28. cloudprovider "k8s.io/cloud-provider"
  29. v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
  30. "k8s.io/kubernetes/pkg/features"
  31. kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
  32. "k8s.io/kubernetes/pkg/kubelet/cadvisor"
  33. "k8s.io/kubernetes/pkg/kubelet/cm"
  34. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  35. "k8s.io/kubernetes/pkg/kubelet/events"
  36. "k8s.io/kubernetes/pkg/version"
  37. "k8s.io/kubernetes/pkg/volume"
  38. "k8s.io/klog"
  39. )
  40. const (
  41. // MaxNamesPerImageInNodeStatus is max number of names
  42. // per image stored in the node status.
  43. MaxNamesPerImageInNodeStatus = 5
  44. )
  45. // Setter modifies the node in-place, and returns an error if the modification failed.
  46. // Setters may partially mutate the node before returning an error.
  47. type Setter func(node *v1.Node) error
  48. // NodeAddress returns a Setter that updates address-related information on the node.
  49. func NodeAddress(nodeIP net.IP, // typically Kubelet.nodeIP
  50. validateNodeIPFunc func(net.IP) error, // typically Kubelet.nodeIPValidator
  51. hostname string, // typically Kubelet.hostname
  52. hostnameOverridden bool, // was the hostname force set?
  53. externalCloudProvider bool, // typically Kubelet.externalCloudProvider
  54. cloud cloudprovider.Interface, // typically Kubelet.cloud
  55. nodeAddressesFunc func() ([]v1.NodeAddress, error), // typically Kubelet.cloudResourceSyncManager.NodeAddresses
  56. ) Setter {
  57. return func(node *v1.Node) error {
  58. if nodeIP != nil {
  59. if err := validateNodeIPFunc(nodeIP); err != nil {
  60. return fmt.Errorf("failed to validate nodeIP: %v", err)
  61. }
  62. klog.V(2).Infof("Using node IP: %q", nodeIP.String())
  63. }
  64. if externalCloudProvider {
  65. if nodeIP != nil {
  66. if node.ObjectMeta.Annotations == nil {
  67. node.ObjectMeta.Annotations = make(map[string]string)
  68. }
  69. node.ObjectMeta.Annotations[kubeletapis.AnnotationProvidedIPAddr] = nodeIP.String()
  70. }
  71. // We rely on the external cloud provider to supply the addresses.
  72. return nil
  73. }
  74. if cloud != nil {
  75. nodeAddresses, err := nodeAddressesFunc()
  76. if err != nil {
  77. return err
  78. }
  79. if nodeIP != nil {
  80. enforcedNodeAddresses := []v1.NodeAddress{}
  81. nodeIPTypes := make(map[v1.NodeAddressType]bool)
  82. for _, nodeAddress := range nodeAddresses {
  83. if nodeAddress.Address == nodeIP.String() {
  84. enforcedNodeAddresses = append(enforcedNodeAddresses, v1.NodeAddress{Type: nodeAddress.Type, Address: nodeAddress.Address})
  85. nodeIPTypes[nodeAddress.Type] = true
  86. }
  87. }
  88. if len(enforcedNodeAddresses) > 0 {
  89. for _, nodeAddress := range nodeAddresses {
  90. if !nodeIPTypes[nodeAddress.Type] && nodeAddress.Type != v1.NodeHostName {
  91. enforcedNodeAddresses = append(enforcedNodeAddresses, v1.NodeAddress{Type: nodeAddress.Type, Address: nodeAddress.Address})
  92. }
  93. }
  94. enforcedNodeAddresses = append(enforcedNodeAddresses, v1.NodeAddress{Type: v1.NodeHostName, Address: hostname})
  95. node.Status.Addresses = enforcedNodeAddresses
  96. return nil
  97. }
  98. return fmt.Errorf("failed to get node address from cloud provider that matches ip: %v", nodeIP)
  99. }
  100. switch {
  101. case len(nodeAddresses) == 0:
  102. // the cloud provider didn't specify any addresses
  103. nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeHostName, Address: hostname})
  104. case !hasAddressType(nodeAddresses, v1.NodeHostName) && hasAddressValue(nodeAddresses, hostname):
  105. // the cloud provider didn't specify an address of type Hostname,
  106. // but the auto-detected hostname matched an address reported by the cloud provider,
  107. // so we can add it and count on the value being verifiable via cloud provider metadata
  108. nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeHostName, Address: hostname})
  109. case hostnameOverridden:
  110. // the hostname was force-set via flag/config.
  111. // this means the hostname might not be able to be validated via cloud provider metadata,
  112. // but was a choice by the kubelet deployer we should honor
  113. var existingHostnameAddress *v1.NodeAddress
  114. for i := range nodeAddresses {
  115. if nodeAddresses[i].Type == v1.NodeHostName {
  116. existingHostnameAddress = &nodeAddresses[i]
  117. break
  118. }
  119. }
  120. if existingHostnameAddress == nil {
  121. // no existing Hostname address found, add it
  122. klog.Warningf("adding overridden hostname of %v to cloudprovider-reported addresses", hostname)
  123. nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeHostName, Address: hostname})
  124. } else if existingHostnameAddress.Address != hostname {
  125. // override the Hostname address reported by the cloud provider
  126. klog.Warningf("replacing cloudprovider-reported hostname of %v with overridden hostname of %v", existingHostnameAddress.Address, hostname)
  127. existingHostnameAddress.Address = hostname
  128. }
  129. }
  130. node.Status.Addresses = nodeAddresses
  131. } else {
  132. var ipAddr net.IP
  133. var err error
  134. // 1) Use nodeIP if set
  135. // 2) If the user has specified an IP to HostnameOverride, use it
  136. // 3) Lookup the IP from node name by DNS and use the first valid IPv4 address.
  137. // If the node does not have a valid IPv4 address, use the first valid IPv6 address.
  138. // 4) Try to get the IP from the network interface used as default gateway
  139. if nodeIP != nil {
  140. ipAddr = nodeIP
  141. } else if addr := net.ParseIP(hostname); addr != nil {
  142. ipAddr = addr
  143. } else {
  144. var addrs []net.IP
  145. addrs, _ = net.LookupIP(node.Name)
  146. for _, addr := range addrs {
  147. if err = validateNodeIPFunc(addr); err == nil {
  148. if addr.To4() != nil {
  149. ipAddr = addr
  150. break
  151. }
  152. if addr.To16() != nil && ipAddr == nil {
  153. ipAddr = addr
  154. }
  155. }
  156. }
  157. if ipAddr == nil {
  158. ipAddr, err = utilnet.ChooseHostInterface()
  159. }
  160. }
  161. if ipAddr == nil {
  162. // We tried everything we could, but the IP address wasn't fetchable; error out
  163. return fmt.Errorf("can't get ip address of node %s. error: %v", node.Name, err)
  164. }
  165. node.Status.Addresses = []v1.NodeAddress{
  166. {Type: v1.NodeInternalIP, Address: ipAddr.String()},
  167. {Type: v1.NodeHostName, Address: hostname},
  168. }
  169. }
  170. return nil
  171. }
  172. }
  173. func hasAddressType(addresses []v1.NodeAddress, addressType v1.NodeAddressType) bool {
  174. for _, address := range addresses {
  175. if address.Type == addressType {
  176. return true
  177. }
  178. }
  179. return false
  180. }
  181. func hasAddressValue(addresses []v1.NodeAddress, addressValue string) bool {
  182. for _, address := range addresses {
  183. if address.Address == addressValue {
  184. return true
  185. }
  186. }
  187. return false
  188. }
  189. // MachineInfo returns a Setter that updates machine-related information on the node.
  190. func MachineInfo(nodeName string,
  191. maxPods int,
  192. podsPerCore int,
  193. machineInfoFunc func() (*cadvisorapiv1.MachineInfo, error), // typically Kubelet.GetCachedMachineInfo
  194. capacityFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetCapacity
  195. devicePluginResourceCapacityFunc func() (v1.ResourceList, v1.ResourceList, []string), // typically Kubelet.containerManager.GetDevicePluginResourceCapacity
  196. nodeAllocatableReservationFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetNodeAllocatableReservation
  197. recordEventFunc func(eventType, event, message string), // typically Kubelet.recordEvent
  198. ) Setter {
  199. return func(node *v1.Node) error {
  200. // Note: avoid blindly overwriting the capacity in case opaque
  201. // resources are being advertised.
  202. if node.Status.Capacity == nil {
  203. node.Status.Capacity = v1.ResourceList{}
  204. }
  205. var devicePluginAllocatable v1.ResourceList
  206. var devicePluginCapacity v1.ResourceList
  207. var removedDevicePlugins []string
  208. // TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
  209. // cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
  210. info, err := machineInfoFunc()
  211. if err != nil {
  212. // TODO(roberthbailey): This is required for test-cmd.sh to pass.
  213. // See if the test should be updated instead.
  214. node.Status.Capacity[v1.ResourceCPU] = *resource.NewMilliQuantity(0, resource.DecimalSI)
  215. node.Status.Capacity[v1.ResourceMemory] = resource.MustParse("0Gi")
  216. node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(int64(maxPods), resource.DecimalSI)
  217. klog.Errorf("Error getting machine info: %v", err)
  218. } else {
  219. node.Status.NodeInfo.MachineID = info.MachineID
  220. node.Status.NodeInfo.SystemUUID = info.SystemUUID
  221. for rName, rCap := range cadvisor.CapacityFromMachineInfo(info) {
  222. node.Status.Capacity[rName] = rCap
  223. }
  224. if podsPerCore > 0 {
  225. node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(
  226. int64(math.Min(float64(info.NumCores*podsPerCore), float64(maxPods))), resource.DecimalSI)
  227. } else {
  228. node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(
  229. int64(maxPods), resource.DecimalSI)
  230. }
  231. if node.Status.NodeInfo.BootID != "" &&
  232. node.Status.NodeInfo.BootID != info.BootID {
  233. // TODO: This requires a transaction, either both node status is updated
  234. // and event is recorded or neither should happen, see issue #6055.
  235. recordEventFunc(v1.EventTypeWarning, events.NodeRebooted,
  236. fmt.Sprintf("Node %s has been rebooted, boot id: %s", nodeName, info.BootID))
  237. }
  238. node.Status.NodeInfo.BootID = info.BootID
  239. if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
  240. // TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the
  241. // capacity for every node status request
  242. initialCapacity := capacityFunc()
  243. if initialCapacity != nil {
  244. if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists {
  245. node.Status.Capacity[v1.ResourceEphemeralStorage] = v
  246. }
  247. }
  248. }
  249. devicePluginCapacity, devicePluginAllocatable, removedDevicePlugins = devicePluginResourceCapacityFunc()
  250. if devicePluginCapacity != nil {
  251. for k, v := range devicePluginCapacity {
  252. if old, ok := node.Status.Capacity[k]; !ok || old.Value() != v.Value() {
  253. klog.V(2).Infof("Update capacity for %s to %d", k, v.Value())
  254. }
  255. node.Status.Capacity[k] = v
  256. }
  257. }
  258. for _, removedResource := range removedDevicePlugins {
  259. klog.V(2).Infof("Set capacity for %s to 0 on device removal", removedResource)
  260. // Set the capacity of the removed resource to 0 instead of
  261. // removing the resource from the node status. This is to indicate
  262. // that the resource is managed by device plugin and had been
  263. // registered before.
  264. //
  265. // This is required to differentiate the device plugin managed
  266. // resources and the cluster-level resources, which are absent in
  267. // node status.
  268. node.Status.Capacity[v1.ResourceName(removedResource)] = *resource.NewQuantity(int64(0), resource.DecimalSI)
  269. }
  270. }
  271. // Set Allocatable.
  272. if node.Status.Allocatable == nil {
  273. node.Status.Allocatable = make(v1.ResourceList)
  274. }
  275. // Remove extended resources from allocatable that are no longer
  276. // present in capacity.
  277. for k := range node.Status.Allocatable {
  278. _, found := node.Status.Capacity[k]
  279. if !found && v1helper.IsExtendedResourceName(k) {
  280. delete(node.Status.Allocatable, k)
  281. }
  282. }
  283. allocatableReservation := nodeAllocatableReservationFunc()
  284. for k, v := range node.Status.Capacity {
  285. value := *(v.Copy())
  286. if res, exists := allocatableReservation[k]; exists {
  287. value.Sub(res)
  288. }
  289. if value.Sign() < 0 {
  290. // Negative Allocatable resources don't make sense.
  291. value.Set(0)
  292. }
  293. node.Status.Allocatable[k] = value
  294. }
  295. if devicePluginAllocatable != nil {
  296. for k, v := range devicePluginAllocatable {
  297. if old, ok := node.Status.Allocatable[k]; !ok || old.Value() != v.Value() {
  298. klog.V(2).Infof("Update allocatable for %s to %d", k, v.Value())
  299. }
  300. node.Status.Allocatable[k] = v
  301. }
  302. }
  303. // for every huge page reservation, we need to remove it from allocatable memory
  304. for k, v := range node.Status.Capacity {
  305. if v1helper.IsHugePageResourceName(k) {
  306. allocatableMemory := node.Status.Allocatable[v1.ResourceMemory]
  307. value := *(v.Copy())
  308. allocatableMemory.Sub(value)
  309. if allocatableMemory.Sign() < 0 {
  310. // Negative Allocatable resources don't make sense.
  311. allocatableMemory.Set(0)
  312. }
  313. node.Status.Allocatable[v1.ResourceMemory] = allocatableMemory
  314. }
  315. }
  316. return nil
  317. }
  318. }
  319. // VersionInfo returns a Setter that updates version-related information on the node.
  320. func VersionInfo(versionInfoFunc func() (*cadvisorapiv1.VersionInfo, error), // typically Kubelet.cadvisor.VersionInfo
  321. runtimeTypeFunc func() string, // typically Kubelet.containerRuntime.Type
  322. runtimeVersionFunc func() (kubecontainer.Version, error), // typically Kubelet.containerRuntime.Version
  323. ) Setter {
  324. return func(node *v1.Node) error {
  325. verinfo, err := versionInfoFunc()
  326. if err != nil {
  327. // TODO(mtaufen): consider removing this log line, since returned error will be logged
  328. klog.Errorf("Error getting version info: %v", err)
  329. return fmt.Errorf("error getting version info: %v", err)
  330. }
  331. node.Status.NodeInfo.KernelVersion = verinfo.KernelVersion
  332. node.Status.NodeInfo.OSImage = verinfo.ContainerOsVersion
  333. runtimeVersion := "Unknown"
  334. if runtimeVer, err := runtimeVersionFunc(); err == nil {
  335. runtimeVersion = runtimeVer.String()
  336. }
  337. node.Status.NodeInfo.ContainerRuntimeVersion = fmt.Sprintf("%s://%s", runtimeTypeFunc(), runtimeVersion)
  338. node.Status.NodeInfo.KubeletVersion = version.Get().String()
  339. // TODO: kube-proxy might be different version from kubelet in the future
  340. node.Status.NodeInfo.KubeProxyVersion = version.Get().String()
  341. return nil
  342. }
  343. }
  344. // DaemonEndpoints returns a Setter that updates the daemon endpoints on the node.
  345. func DaemonEndpoints(daemonEndpoints *v1.NodeDaemonEndpoints) Setter {
  346. return func(node *v1.Node) error {
  347. node.Status.DaemonEndpoints = *daemonEndpoints
  348. return nil
  349. }
  350. }
  351. // Images returns a Setter that updates the images on the node.
  352. // imageListFunc is expected to return a list of images sorted in descending order by image size.
  353. // nodeStatusMaxImages is ignored if set to -1.
  354. func Images(nodeStatusMaxImages int32,
  355. imageListFunc func() ([]kubecontainer.Image, error), // typically Kubelet.imageManager.GetImageList
  356. ) Setter {
  357. return func(node *v1.Node) error {
  358. // Update image list of this node
  359. var imagesOnNode []v1.ContainerImage
  360. containerImages, err := imageListFunc()
  361. if err != nil {
  362. // TODO(mtaufen): consider removing this log line, since returned error will be logged
  363. klog.Errorf("Error getting image list: %v", err)
  364. node.Status.Images = imagesOnNode
  365. return fmt.Errorf("error getting image list: %v", err)
  366. }
  367. // we expect imageListFunc to return a sorted list, so we just need to truncate
  368. if int(nodeStatusMaxImages) > -1 &&
  369. int(nodeStatusMaxImages) < len(containerImages) {
  370. containerImages = containerImages[0:nodeStatusMaxImages]
  371. }
  372. for _, image := range containerImages {
  373. names := append(image.RepoDigests, image.RepoTags...)
  374. // Report up to MaxNamesPerImageInNodeStatus names per image.
  375. if len(names) > MaxNamesPerImageInNodeStatus {
  376. names = names[0:MaxNamesPerImageInNodeStatus]
  377. }
  378. imagesOnNode = append(imagesOnNode, v1.ContainerImage{
  379. Names: names,
  380. SizeBytes: image.Size,
  381. })
  382. }
  383. node.Status.Images = imagesOnNode
  384. return nil
  385. }
  386. }
  387. // GoRuntime returns a Setter that sets GOOS and GOARCH on the node.
  388. func GoRuntime() Setter {
  389. return func(node *v1.Node) error {
  390. node.Status.NodeInfo.OperatingSystem = goruntime.GOOS
  391. node.Status.NodeInfo.Architecture = goruntime.GOARCH
  392. return nil
  393. }
  394. }
  395. // ReadyCondition returns a Setter that updates the v1.NodeReady condition on the node.
  396. func ReadyCondition(
  397. nowFunc func() time.Time, // typically Kubelet.clock.Now
  398. runtimeErrorsFunc func() error, // typically Kubelet.runtimeState.runtimeErrors
  399. networkErrorsFunc func() error, // typically Kubelet.runtimeState.networkErrors
  400. storageErrorsFunc func() error, // typically Kubelet.runtimeState.storageErrors
  401. appArmorValidateHostFunc func() error, // typically Kubelet.appArmorValidator.ValidateHost, might be nil depending on whether there was an appArmorValidator
  402. cmStatusFunc func() cm.Status, // typically Kubelet.containerManager.Status
  403. recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
  404. ) Setter {
  405. return func(node *v1.Node) error {
  406. // NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions.
  407. // This is due to an issue with version skewed kubelet and master components.
  408. // ref: https://github.com/kubernetes/kubernetes/issues/16961
  409. currentTime := metav1.NewTime(nowFunc())
  410. newNodeReadyCondition := v1.NodeCondition{
  411. Type: v1.NodeReady,
  412. Status: v1.ConditionTrue,
  413. Reason: "KubeletReady",
  414. Message: "kubelet is posting ready status",
  415. LastHeartbeatTime: currentTime,
  416. }
  417. errs := []error{runtimeErrorsFunc(), networkErrorsFunc(), storageErrorsFunc()}
  418. requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods}
  419. if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
  420. requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage)
  421. }
  422. missingCapacities := []string{}
  423. for _, resource := range requiredCapacities {
  424. if _, found := node.Status.Capacity[resource]; !found {
  425. missingCapacities = append(missingCapacities, string(resource))
  426. }
  427. }
  428. if len(missingCapacities) > 0 {
  429. errs = append(errs, fmt.Errorf("Missing node capacity for resources: %s", strings.Join(missingCapacities, ", ")))
  430. }
  431. if aggregatedErr := errors.NewAggregate(errs); aggregatedErr != nil {
  432. newNodeReadyCondition = v1.NodeCondition{
  433. Type: v1.NodeReady,
  434. Status: v1.ConditionFalse,
  435. Reason: "KubeletNotReady",
  436. Message: aggregatedErr.Error(),
  437. LastHeartbeatTime: currentTime,
  438. }
  439. }
  440. // Append AppArmor status if it's enabled.
  441. // TODO(tallclair): This is a temporary message until node feature reporting is added.
  442. if appArmorValidateHostFunc != nil && newNodeReadyCondition.Status == v1.ConditionTrue {
  443. if err := appArmorValidateHostFunc(); err == nil {
  444. newNodeReadyCondition.Message = fmt.Sprintf("%s. AppArmor enabled", newNodeReadyCondition.Message)
  445. }
  446. }
  447. // Record any soft requirements that were not met in the container manager.
  448. status := cmStatusFunc()
  449. if status.SoftRequirements != nil {
  450. newNodeReadyCondition.Message = fmt.Sprintf("%s. WARNING: %s", newNodeReadyCondition.Message, status.SoftRequirements.Error())
  451. }
  452. readyConditionUpdated := false
  453. needToRecordEvent := false
  454. for i := range node.Status.Conditions {
  455. if node.Status.Conditions[i].Type == v1.NodeReady {
  456. if node.Status.Conditions[i].Status == newNodeReadyCondition.Status {
  457. newNodeReadyCondition.LastTransitionTime = node.Status.Conditions[i].LastTransitionTime
  458. } else {
  459. newNodeReadyCondition.LastTransitionTime = currentTime
  460. needToRecordEvent = true
  461. }
  462. node.Status.Conditions[i] = newNodeReadyCondition
  463. readyConditionUpdated = true
  464. break
  465. }
  466. }
  467. if !readyConditionUpdated {
  468. newNodeReadyCondition.LastTransitionTime = currentTime
  469. node.Status.Conditions = append(node.Status.Conditions, newNodeReadyCondition)
  470. }
  471. if needToRecordEvent {
  472. if newNodeReadyCondition.Status == v1.ConditionTrue {
  473. recordEventFunc(v1.EventTypeNormal, events.NodeReady)
  474. } else {
  475. recordEventFunc(v1.EventTypeNormal, events.NodeNotReady)
  476. klog.Infof("Node became not ready: %+v", newNodeReadyCondition)
  477. }
  478. }
  479. return nil
  480. }
  481. }
  482. // MemoryPressureCondition returns a Setter that updates the v1.NodeMemoryPressure condition on the node.
  483. func MemoryPressureCondition(nowFunc func() time.Time, // typically Kubelet.clock.Now
  484. pressureFunc func() bool, // typically Kubelet.evictionManager.IsUnderMemoryPressure
  485. recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
  486. ) Setter {
  487. return func(node *v1.Node) error {
  488. currentTime := metav1.NewTime(nowFunc())
  489. var condition *v1.NodeCondition
  490. // Check if NodeMemoryPressure condition already exists and if it does, just pick it up for update.
  491. for i := range node.Status.Conditions {
  492. if node.Status.Conditions[i].Type == v1.NodeMemoryPressure {
  493. condition = &node.Status.Conditions[i]
  494. }
  495. }
  496. newCondition := false
  497. // If the NodeMemoryPressure condition doesn't exist, create one
  498. if condition == nil {
  499. condition = &v1.NodeCondition{
  500. Type: v1.NodeMemoryPressure,
  501. Status: v1.ConditionUnknown,
  502. }
  503. // cannot be appended to node.Status.Conditions here because it gets
  504. // copied to the slice. So if we append to the slice here none of the
  505. // updates we make below are reflected in the slice.
  506. newCondition = true
  507. }
  508. // Update the heartbeat time
  509. condition.LastHeartbeatTime = currentTime
  510. // Note: The conditions below take care of the case when a new NodeMemoryPressure condition is
  511. // created and as well as the case when the condition already exists. When a new condition
  512. // is created its status is set to v1.ConditionUnknown which matches either
  513. // condition.Status != v1.ConditionTrue or
  514. // condition.Status != v1.ConditionFalse in the conditions below depending on whether
  515. // the kubelet is under memory pressure or not.
  516. if pressureFunc() {
  517. if condition.Status != v1.ConditionTrue {
  518. condition.Status = v1.ConditionTrue
  519. condition.Reason = "KubeletHasInsufficientMemory"
  520. condition.Message = "kubelet has insufficient memory available"
  521. condition.LastTransitionTime = currentTime
  522. recordEventFunc(v1.EventTypeNormal, "NodeHasInsufficientMemory")
  523. }
  524. } else if condition.Status != v1.ConditionFalse {
  525. condition.Status = v1.ConditionFalse
  526. condition.Reason = "KubeletHasSufficientMemory"
  527. condition.Message = "kubelet has sufficient memory available"
  528. condition.LastTransitionTime = currentTime
  529. recordEventFunc(v1.EventTypeNormal, "NodeHasSufficientMemory")
  530. }
  531. if newCondition {
  532. node.Status.Conditions = append(node.Status.Conditions, *condition)
  533. }
  534. return nil
  535. }
  536. }
  537. // PIDPressureCondition returns a Setter that updates the v1.NodePIDPressure condition on the node.
  538. func PIDPressureCondition(nowFunc func() time.Time, // typically Kubelet.clock.Now
  539. pressureFunc func() bool, // typically Kubelet.evictionManager.IsUnderPIDPressure
  540. recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
  541. ) Setter {
  542. return func(node *v1.Node) error {
  543. currentTime := metav1.NewTime(nowFunc())
  544. var condition *v1.NodeCondition
  545. // Check if NodePIDPressure condition already exists and if it does, just pick it up for update.
  546. for i := range node.Status.Conditions {
  547. if node.Status.Conditions[i].Type == v1.NodePIDPressure {
  548. condition = &node.Status.Conditions[i]
  549. }
  550. }
  551. newCondition := false
  552. // If the NodePIDPressure condition doesn't exist, create one
  553. if condition == nil {
  554. condition = &v1.NodeCondition{
  555. Type: v1.NodePIDPressure,
  556. Status: v1.ConditionUnknown,
  557. }
  558. // cannot be appended to node.Status.Conditions here because it gets
  559. // copied to the slice. So if we append to the slice here none of the
  560. // updates we make below are reflected in the slice.
  561. newCondition = true
  562. }
  563. // Update the heartbeat time
  564. condition.LastHeartbeatTime = currentTime
  565. // Note: The conditions below take care of the case when a new NodePIDPressure condition is
  566. // created and as well as the case when the condition already exists. When a new condition
  567. // is created its status is set to v1.ConditionUnknown which matches either
  568. // condition.Status != v1.ConditionTrue or
  569. // condition.Status != v1.ConditionFalse in the conditions below depending on whether
  570. // the kubelet is under PID pressure or not.
  571. if pressureFunc() {
  572. if condition.Status != v1.ConditionTrue {
  573. condition.Status = v1.ConditionTrue
  574. condition.Reason = "KubeletHasInsufficientPID"
  575. condition.Message = "kubelet has insufficient PID available"
  576. condition.LastTransitionTime = currentTime
  577. recordEventFunc(v1.EventTypeNormal, "NodeHasInsufficientPID")
  578. }
  579. } else if condition.Status != v1.ConditionFalse {
  580. condition.Status = v1.ConditionFalse
  581. condition.Reason = "KubeletHasSufficientPID"
  582. condition.Message = "kubelet has sufficient PID available"
  583. condition.LastTransitionTime = currentTime
  584. recordEventFunc(v1.EventTypeNormal, "NodeHasSufficientPID")
  585. }
  586. if newCondition {
  587. node.Status.Conditions = append(node.Status.Conditions, *condition)
  588. }
  589. return nil
  590. }
  591. }
  592. // DiskPressureCondition returns a Setter that updates the v1.NodeDiskPressure condition on the node.
  593. func DiskPressureCondition(nowFunc func() time.Time, // typically Kubelet.clock.Now
  594. pressureFunc func() bool, // typically Kubelet.evictionManager.IsUnderDiskPressure
  595. recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
  596. ) Setter {
  597. return func(node *v1.Node) error {
  598. currentTime := metav1.NewTime(nowFunc())
  599. var condition *v1.NodeCondition
  600. // Check if NodeDiskPressure condition already exists and if it does, just pick it up for update.
  601. for i := range node.Status.Conditions {
  602. if node.Status.Conditions[i].Type == v1.NodeDiskPressure {
  603. condition = &node.Status.Conditions[i]
  604. }
  605. }
  606. newCondition := false
  607. // If the NodeDiskPressure condition doesn't exist, create one
  608. if condition == nil {
  609. condition = &v1.NodeCondition{
  610. Type: v1.NodeDiskPressure,
  611. Status: v1.ConditionUnknown,
  612. }
  613. // cannot be appended to node.Status.Conditions here because it gets
  614. // copied to the slice. So if we append to the slice here none of the
  615. // updates we make below are reflected in the slice.
  616. newCondition = true
  617. }
  618. // Update the heartbeat time
  619. condition.LastHeartbeatTime = currentTime
  620. // Note: The conditions below take care of the case when a new NodeDiskPressure condition is
  621. // created and as well as the case when the condition already exists. When a new condition
  622. // is created its status is set to v1.ConditionUnknown which matches either
  623. // condition.Status != v1.ConditionTrue or
  624. // condition.Status != v1.ConditionFalse in the conditions below depending on whether
  625. // the kubelet is under disk pressure or not.
  626. if pressureFunc() {
  627. if condition.Status != v1.ConditionTrue {
  628. condition.Status = v1.ConditionTrue
  629. condition.Reason = "KubeletHasDiskPressure"
  630. condition.Message = "kubelet has disk pressure"
  631. condition.LastTransitionTime = currentTime
  632. recordEventFunc(v1.EventTypeNormal, "NodeHasDiskPressure")
  633. }
  634. } else if condition.Status != v1.ConditionFalse {
  635. condition.Status = v1.ConditionFalse
  636. condition.Reason = "KubeletHasNoDiskPressure"
  637. condition.Message = "kubelet has no disk pressure"
  638. condition.LastTransitionTime = currentTime
  639. recordEventFunc(v1.EventTypeNormal, "NodeHasNoDiskPressure")
  640. }
  641. if newCondition {
  642. node.Status.Conditions = append(node.Status.Conditions, *condition)
  643. }
  644. return nil
  645. }
  646. }
  647. // VolumesInUse returns a Setter that updates the volumes in use on the node.
  648. func VolumesInUse(syncedFunc func() bool, // typically Kubelet.volumeManager.ReconcilerStatesHasBeenSynced
  649. volumesInUseFunc func() []v1.UniqueVolumeName, // typically Kubelet.volumeManager.GetVolumesInUse
  650. ) Setter {
  651. return func(node *v1.Node) error {
  652. // Make sure to only update node status after reconciler starts syncing up states
  653. if syncedFunc() {
  654. node.Status.VolumesInUse = volumesInUseFunc()
  655. }
  656. return nil
  657. }
  658. }
  659. // VolumeLimits returns a Setter that updates the volume limits on the node.
  660. func VolumeLimits(volumePluginListFunc func() []volume.VolumePluginWithAttachLimits, // typically Kubelet.volumePluginMgr.ListVolumePluginWithLimits
  661. ) Setter {
  662. return func(node *v1.Node) error {
  663. if node.Status.Capacity == nil {
  664. node.Status.Capacity = v1.ResourceList{}
  665. }
  666. if node.Status.Allocatable == nil {
  667. node.Status.Allocatable = v1.ResourceList{}
  668. }
  669. pluginWithLimits := volumePluginListFunc()
  670. for _, volumePlugin := range pluginWithLimits {
  671. attachLimits, err := volumePlugin.GetVolumeLimits()
  672. if err != nil {
  673. klog.V(4).Infof("Error getting volume limit for plugin %s", volumePlugin.GetPluginName())
  674. continue
  675. }
  676. for limitKey, value := range attachLimits {
  677. node.Status.Capacity[v1.ResourceName(limitKey)] = *resource.NewQuantity(value, resource.DecimalSI)
  678. node.Status.Allocatable[v1.ResourceName(limitKey)] = *resource.NewQuantity(value, resource.DecimalSI)
  679. }
  680. }
  681. return nil
  682. }
  683. }
  684. // RemoveOutOfDiskCondition removes stale OutOfDisk condition
  685. // OutOfDisk condition has been removed from kubelet in 1.12
  686. func RemoveOutOfDiskCondition() Setter {
  687. return func(node *v1.Node) error {
  688. var conditions []v1.NodeCondition
  689. for i := range node.Status.Conditions {
  690. if node.Status.Conditions[i].Type != v1.NodeOutOfDisk {
  691. conditions = append(conditions, node.Status.Conditions[i])
  692. }
  693. }
  694. node.Status.Conditions = conditions
  695. return nil
  696. }
  697. }