resource.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package node
  14. import (
  15. "context"
  16. "fmt"
  17. "net"
  18. "strings"
  19. "time"
  20. v1 "k8s.io/api/core/v1"
  21. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  22. "k8s.io/apimachinery/pkg/util/rand"
  23. "k8s.io/apimachinery/pkg/util/sets"
  24. clientset "k8s.io/client-go/kubernetes"
  25. v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
  26. nodectlr "k8s.io/kubernetes/pkg/controller/nodelifecycle"
  27. schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
  28. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  29. "k8s.io/kubernetes/test/e2e/system"
  30. )
  31. const (
  32. // poll is how often to Poll pods, nodes and claims.
  33. poll = 2 * time.Second
  34. // singleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
  35. // transient failures from failing tests.
  36. singleCallTimeout = 5 * time.Minute
  37. // ssh port
  38. sshPort = "22"
  39. )
  40. // PodNode is a pod-node pair indicating which node a given pod is running on
  41. type PodNode struct {
  42. // Pod represents pod name
  43. Pod string
  44. // Node represents node name
  45. Node string
  46. }
  47. // FirstAddress returns the first address of the given type of each node.
  48. func FirstAddress(nodelist *v1.NodeList, addrType v1.NodeAddressType) string {
  49. for _, n := range nodelist.Items {
  50. for _, addr := range n.Status.Addresses {
  51. if addr.Type == addrType && addr.Address != "" {
  52. return addr.Address
  53. }
  54. }
  55. }
  56. return ""
  57. }
  58. func isNodeConditionSetAsExpected(node *v1.Node, conditionType v1.NodeConditionType, wantTrue, silent bool) bool {
  59. // Check the node readiness condition (logging all).
  60. for _, cond := range node.Status.Conditions {
  61. // Ensure that the condition type and the status matches as desired.
  62. if cond.Type == conditionType {
  63. // For NodeReady condition we need to check Taints as well
  64. if cond.Type == v1.NodeReady {
  65. hasNodeControllerTaints := false
  66. // For NodeReady we need to check if Taints are gone as well
  67. taints := node.Spec.Taints
  68. for _, taint := range taints {
  69. if taint.MatchTaint(nodectlr.UnreachableTaintTemplate) || taint.MatchTaint(nodectlr.NotReadyTaintTemplate) {
  70. hasNodeControllerTaints = true
  71. break
  72. }
  73. }
  74. if wantTrue {
  75. if (cond.Status == v1.ConditionTrue) && !hasNodeControllerTaints {
  76. return true
  77. }
  78. msg := ""
  79. if !hasNodeControllerTaints {
  80. msg = fmt.Sprintf("Condition %s of node %s is %v instead of %t. Reason: %v, message: %v",
  81. conditionType, node.Name, cond.Status == v1.ConditionTrue, wantTrue, cond.Reason, cond.Message)
  82. } else {
  83. msg = fmt.Sprintf("Condition %s of node %s is %v, but Node is tainted by NodeController with %v. Failure",
  84. conditionType, node.Name, cond.Status == v1.ConditionTrue, taints)
  85. }
  86. if !silent {
  87. e2elog.Logf(msg)
  88. }
  89. return false
  90. }
  91. // TODO: check if the Node is tainted once we enable NC notReady/unreachable taints by default
  92. if cond.Status != v1.ConditionTrue {
  93. return true
  94. }
  95. if !silent {
  96. e2elog.Logf("Condition %s of node %s is %v instead of %t. Reason: %v, message: %v",
  97. conditionType, node.Name, cond.Status == v1.ConditionTrue, wantTrue, cond.Reason, cond.Message)
  98. }
  99. return false
  100. }
  101. if (wantTrue && (cond.Status == v1.ConditionTrue)) || (!wantTrue && (cond.Status != v1.ConditionTrue)) {
  102. return true
  103. }
  104. if !silent {
  105. e2elog.Logf("Condition %s of node %s is %v instead of %t. Reason: %v, message: %v",
  106. conditionType, node.Name, cond.Status == v1.ConditionTrue, wantTrue, cond.Reason, cond.Message)
  107. }
  108. return false
  109. }
  110. }
  111. if !silent {
  112. e2elog.Logf("Couldn't find condition %v on node %v", conditionType, node.Name)
  113. }
  114. return false
  115. }
  116. // IsConditionSetAsExpected returns a wantTrue value if the node has a match to the conditionType, otherwise returns an opposite value of the wantTrue with detailed logging.
  117. func IsConditionSetAsExpected(node *v1.Node, conditionType v1.NodeConditionType, wantTrue bool) bool {
  118. return isNodeConditionSetAsExpected(node, conditionType, wantTrue, false)
  119. }
  120. // IsConditionSetAsExpectedSilent returns a wantTrue value if the node has a match to the conditionType, otherwise returns an opposite value of the wantTrue.
  121. func IsConditionSetAsExpectedSilent(node *v1.Node, conditionType v1.NodeConditionType, wantTrue bool) bool {
  122. return isNodeConditionSetAsExpected(node, conditionType, wantTrue, true)
  123. }
  124. // isConditionUnset returns true if conditions of the given node do not have a match to the given conditionType, otherwise false.
  125. func isConditionUnset(node *v1.Node, conditionType v1.NodeConditionType) bool {
  126. for _, cond := range node.Status.Conditions {
  127. if cond.Type == conditionType {
  128. return false
  129. }
  130. }
  131. return true
  132. }
  133. // Filter filters nodes in NodeList in place, removing nodes that do not
  134. // satisfy the given condition
  135. func Filter(nodeList *v1.NodeList, fn func(node v1.Node) bool) {
  136. var l []v1.Node
  137. for _, node := range nodeList.Items {
  138. if fn(node) {
  139. l = append(l, node)
  140. }
  141. }
  142. nodeList.Items = l
  143. }
  144. // TotalRegistered returns number of registered Nodes excluding Master Node.
  145. func TotalRegistered(c clientset.Interface) (int, error) {
  146. nodes, err := waitListSchedulableNodes(c)
  147. if err != nil {
  148. e2elog.Logf("Failed to list nodes: %v", err)
  149. return 0, err
  150. }
  151. return len(nodes.Items), nil
  152. }
  153. // TotalReady returns number of ready Nodes excluding Master Node.
  154. func TotalReady(c clientset.Interface) (int, error) {
  155. nodes, err := waitListSchedulableNodes(c)
  156. if err != nil {
  157. e2elog.Logf("Failed to list nodes: %v", err)
  158. return 0, err
  159. }
  160. // Filter out not-ready nodes.
  161. Filter(nodes, func(node v1.Node) bool {
  162. return IsConditionSetAsExpected(&node, v1.NodeReady, true)
  163. })
  164. return len(nodes.Items), nil
  165. }
  166. // GetExternalIP returns node external IP concatenated with port 22 for ssh
  167. // e.g. 1.2.3.4:22
  168. func GetExternalIP(node *v1.Node) (string, error) {
  169. e2elog.Logf("Getting external IP address for %s", node.Name)
  170. host := ""
  171. for _, a := range node.Status.Addresses {
  172. if a.Type == v1.NodeExternalIP && a.Address != "" {
  173. host = net.JoinHostPort(a.Address, sshPort)
  174. break
  175. }
  176. }
  177. if host == "" {
  178. return "", fmt.Errorf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
  179. }
  180. return host, nil
  181. }
  182. // GetInternalIP returns node internal IP
  183. func GetInternalIP(node *v1.Node) (string, error) {
  184. host := ""
  185. for _, address := range node.Status.Addresses {
  186. if address.Type == v1.NodeInternalIP && address.Address != "" {
  187. host = net.JoinHostPort(address.Address, sshPort)
  188. break
  189. }
  190. }
  191. if host == "" {
  192. return "", fmt.Errorf("Couldn't get the internal IP of host %s with addresses %v", node.Name, node.Status.Addresses)
  193. }
  194. return host, nil
  195. }
  196. // GetAddresses returns a list of addresses of the given addressType for the given node
  197. func GetAddresses(node *v1.Node, addressType v1.NodeAddressType) (ips []string) {
  198. for j := range node.Status.Addresses {
  199. nodeAddress := &node.Status.Addresses[j]
  200. if nodeAddress.Type == addressType && nodeAddress.Address != "" {
  201. ips = append(ips, nodeAddress.Address)
  202. }
  203. }
  204. return
  205. }
  206. // CollectAddresses returns a list of addresses of the given addressType for the given list of nodes
  207. func CollectAddresses(nodes *v1.NodeList, addressType v1.NodeAddressType) []string {
  208. ips := []string{}
  209. for i := range nodes.Items {
  210. ips = append(ips, GetAddresses(&nodes.Items[i], addressType)...)
  211. }
  212. return ips
  213. }
  214. // PickIP picks one public node IP
  215. func PickIP(c clientset.Interface) (string, error) {
  216. publicIps, err := GetPublicIps(c)
  217. if err != nil {
  218. return "", fmt.Errorf("get node public IPs error: %s", err)
  219. }
  220. if len(publicIps) == 0 {
  221. return "", fmt.Errorf("got unexpected number (%d) of public IPs", len(publicIps))
  222. }
  223. ip := publicIps[0]
  224. return ip, nil
  225. }
  226. // GetPublicIps returns a public IP list of nodes.
  227. func GetPublicIps(c clientset.Interface) ([]string, error) {
  228. nodes, err := GetReadySchedulableNodes(c)
  229. if err != nil {
  230. return nil, fmt.Errorf("get schedulable and ready nodes error: %s", err)
  231. }
  232. ips := CollectAddresses(nodes, v1.NodeExternalIP)
  233. if len(ips) == 0 {
  234. // If ExternalIP isn't set, assume the test programs can reach the InternalIP
  235. ips = CollectAddresses(nodes, v1.NodeInternalIP)
  236. }
  237. return ips, nil
  238. }
  239. // GetReadySchedulableNodes addresses the common use case of getting nodes you can do work on.
  240. // 1) Needs to be schedulable.
  241. // 2) Needs to be ready.
  242. // If EITHER 1 or 2 is not true, most tests will want to ignore the node entirely.
  243. // If there are no nodes that are both ready and schedulable, this will return an error.
  244. func GetReadySchedulableNodes(c clientset.Interface) (nodes *v1.NodeList, err error) {
  245. nodes, err = checkWaitListSchedulableNodes(c)
  246. if err != nil {
  247. return nil, fmt.Errorf("listing schedulable nodes error: %s", err)
  248. }
  249. Filter(nodes, func(node v1.Node) bool {
  250. return IsNodeSchedulable(&node) && isNodeUntainted(&node)
  251. })
  252. if len(nodes.Items) == 0 {
  253. return nil, fmt.Errorf("there are currently no ready, schedulable nodes in the cluster")
  254. }
  255. return nodes, nil
  256. }
  257. // GetBoundedReadySchedulableNodes is like GetReadySchedulableNodes except that it returns
  258. // at most maxNodes nodes. Use this to keep your test case from blowing up when run on a
  259. // large cluster.
  260. func GetBoundedReadySchedulableNodes(c clientset.Interface, maxNodes int) (nodes *v1.NodeList, err error) {
  261. nodes, err = GetReadySchedulableNodes(c)
  262. if err != nil {
  263. return nil, err
  264. }
  265. if len(nodes.Items) > maxNodes {
  266. shuffled := make([]v1.Node, maxNodes)
  267. perm := rand.Perm(len(nodes.Items))
  268. for i, j := range perm {
  269. if j < len(shuffled) {
  270. shuffled[j] = nodes.Items[i]
  271. }
  272. }
  273. nodes.Items = shuffled
  274. }
  275. return nodes, nil
  276. }
  277. // GetRandomReadySchedulableNode gets a single randomly-selected node which is available for
  278. // running pods on. If there are no available nodes it will return an error.
  279. func GetRandomReadySchedulableNode(c clientset.Interface) (*v1.Node, error) {
  280. nodes, err := GetReadySchedulableNodes(c)
  281. if err != nil {
  282. return nil, err
  283. }
  284. return &nodes.Items[rand.Intn(len(nodes.Items))], nil
  285. }
  286. // GetReadyNodesIncludingTainted returns all ready nodes, even those which are tainted.
  287. // There are cases when we care about tainted nodes
  288. // E.g. in tests related to nodes with gpu we care about nodes despite
  289. // presence of nvidia.com/gpu=present:NoSchedule taint
  290. func GetReadyNodesIncludingTainted(c clientset.Interface) (nodes *v1.NodeList, err error) {
  291. nodes, err = checkWaitListSchedulableNodes(c)
  292. if err != nil {
  293. return nil, fmt.Errorf("listing schedulable nodes error: %s", err)
  294. }
  295. Filter(nodes, func(node v1.Node) bool {
  296. return IsNodeSchedulable(&node)
  297. })
  298. return nodes, nil
  299. }
  300. // GetMasterAndWorkerNodes will return a list masters and schedulable worker nodes
  301. func GetMasterAndWorkerNodes(c clientset.Interface) (sets.String, *v1.NodeList, error) {
  302. nodes := &v1.NodeList{}
  303. masters := sets.NewString()
  304. all, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
  305. if err != nil {
  306. return nil, nil, fmt.Errorf("get nodes error: %s", err)
  307. }
  308. for _, n := range all.Items {
  309. if system.DeprecatedMightBeMasterNode(n.Name) {
  310. masters.Insert(n.Name)
  311. } else if IsNodeSchedulable(&n) && isNodeUntainted(&n) {
  312. nodes.Items = append(nodes.Items, n)
  313. }
  314. }
  315. return masters, nodes, nil
  316. }
  317. // isNodeUntainted tests whether a fake pod can be scheduled on "node", given its current taints.
  318. // TODO: need to discuss wether to return bool and error type
  319. func isNodeUntainted(node *v1.Node) bool {
  320. return isNodeUntaintedWithNonblocking(node, "")
  321. }
  322. // isNodeUntaintedWithNonblocking tests whether a fake pod can be scheduled on "node"
  323. // but allows for taints in the list of non-blocking taints.
  324. func isNodeUntaintedWithNonblocking(node *v1.Node, nonblockingTaints string) bool {
  325. fakePod := &v1.Pod{
  326. TypeMeta: metav1.TypeMeta{
  327. Kind: "Pod",
  328. APIVersion: "v1",
  329. },
  330. ObjectMeta: metav1.ObjectMeta{
  331. Name: "fake-not-scheduled",
  332. Namespace: "fake-not-scheduled",
  333. },
  334. Spec: v1.PodSpec{
  335. Containers: []v1.Container{
  336. {
  337. Name: "fake-not-scheduled",
  338. Image: "fake-not-scheduled",
  339. },
  340. },
  341. },
  342. }
  343. nodeInfo := schedulernodeinfo.NewNodeInfo()
  344. // Simple lookup for nonblocking taints based on comma-delimited list.
  345. nonblockingTaintsMap := map[string]struct{}{}
  346. for _, t := range strings.Split(nonblockingTaints, ",") {
  347. if strings.TrimSpace(t) != "" {
  348. nonblockingTaintsMap[strings.TrimSpace(t)] = struct{}{}
  349. }
  350. }
  351. if len(nonblockingTaintsMap) > 0 {
  352. nodeCopy := node.DeepCopy()
  353. nodeCopy.Spec.Taints = []v1.Taint{}
  354. for _, v := range node.Spec.Taints {
  355. if _, isNonblockingTaint := nonblockingTaintsMap[v.Key]; !isNonblockingTaint {
  356. nodeCopy.Spec.Taints = append(nodeCopy.Spec.Taints, v)
  357. }
  358. }
  359. nodeInfo.SetNode(nodeCopy)
  360. } else {
  361. nodeInfo.SetNode(node)
  362. }
  363. taints, err := nodeInfo.Taints()
  364. if err != nil {
  365. e2elog.Failf("Can't test predicates for node %s: %v", node.Name, err)
  366. return false
  367. }
  368. return v1helper.TolerationsTolerateTaintsWithFilter(fakePod.Spec.Tolerations, taints, func(t *v1.Taint) bool {
  369. return t.Effect == v1.TaintEffectNoExecute || t.Effect == v1.TaintEffectNoSchedule
  370. })
  371. }
  372. // IsNodeSchedulable returns true if:
  373. // 1) doesn't have "unschedulable" field set
  374. // 2) it also returns true from IsNodeReady
  375. func IsNodeSchedulable(node *v1.Node) bool {
  376. if node == nil {
  377. return false
  378. }
  379. return !node.Spec.Unschedulable && IsNodeReady(node)
  380. }
  381. // IsNodeReady returns true if:
  382. // 1) it's Ready condition is set to true
  383. // 2) doesn't have NetworkUnavailable condition set to true
  384. func IsNodeReady(node *v1.Node) bool {
  385. nodeReady := IsConditionSetAsExpected(node, v1.NodeReady, true)
  386. networkReady := isConditionUnset(node, v1.NodeNetworkUnavailable) ||
  387. IsConditionSetAsExpectedSilent(node, v1.NodeNetworkUnavailable, false)
  388. return nodeReady && networkReady
  389. }
  390. // hasNonblockingTaint returns true if the node contains at least
  391. // one taint with a key matching the regexp.
  392. func hasNonblockingTaint(node *v1.Node, nonblockingTaints string) bool {
  393. if node == nil {
  394. return false
  395. }
  396. // Simple lookup for nonblocking taints based on comma-delimited list.
  397. nonblockingTaintsMap := map[string]struct{}{}
  398. for _, t := range strings.Split(nonblockingTaints, ",") {
  399. if strings.TrimSpace(t) != "" {
  400. nonblockingTaintsMap[strings.TrimSpace(t)] = struct{}{}
  401. }
  402. }
  403. for _, taint := range node.Spec.Taints {
  404. if _, hasNonblockingTaint := nonblockingTaintsMap[taint.Key]; hasNonblockingTaint {
  405. return true
  406. }
  407. }
  408. return false
  409. }
  410. // PodNodePairs return podNode pairs for all pods in a namespace
  411. func PodNodePairs(c clientset.Interface, ns string) ([]PodNode, error) {
  412. var result []PodNode
  413. podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
  414. if err != nil {
  415. return result, err
  416. }
  417. for _, pod := range podList.Items {
  418. result = append(result, PodNode{
  419. Pod: pod.Name,
  420. Node: pod.Spec.NodeName,
  421. })
  422. }
  423. return result, nil
  424. }