resource.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package node
  14. import (
  15. "context"
  16. "fmt"
  17. "net"
  18. "strings"
  19. "time"
  20. v1 "k8s.io/api/core/v1"
  21. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  22. "k8s.io/apimachinery/pkg/util/rand"
  23. "k8s.io/apimachinery/pkg/util/sets"
  24. clientset "k8s.io/client-go/kubernetes"
  25. v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
  26. nodectlr "k8s.io/kubernetes/pkg/controller/nodelifecycle"
  27. schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
  28. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  29. "k8s.io/kubernetes/test/e2e/system"
  30. )
  31. const (
  32. // poll is how often to Poll pods, nodes and claims.
  33. poll = 2 * time.Second
  34. // singleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
  35. // transient failures from failing tests.
  36. // TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
  37. singleCallTimeout = 5 * time.Minute
  38. // ssh port
  39. sshPort = "22"
  40. )
  41. // PodNode is a pod-node pair indicating which node a given pod is running on
  42. type PodNode struct {
  43. // Pod represents pod name
  44. Pod string
  45. // Node represents node name
  46. Node string
  47. }
  48. // FirstAddress returns the first address of the given type of each node.
  49. func FirstAddress(nodelist *v1.NodeList, addrType v1.NodeAddressType) string {
  50. for _, n := range nodelist.Items {
  51. for _, addr := range n.Status.Addresses {
  52. if addr.Type == addrType && addr.Address != "" {
  53. return addr.Address
  54. }
  55. }
  56. }
  57. return ""
  58. }
  59. // TODO: better to change to a easy read name
  60. func isNodeConditionSetAsExpected(node *v1.Node, conditionType v1.NodeConditionType, wantTrue, silent bool) bool {
  61. // Check the node readiness condition (logging all).
  62. for _, cond := range node.Status.Conditions {
  63. // Ensure that the condition type and the status matches as desired.
  64. if cond.Type == conditionType {
  65. // For NodeReady condition we need to check Taints as well
  66. if cond.Type == v1.NodeReady {
  67. hasNodeControllerTaints := false
  68. // For NodeReady we need to check if Taints are gone as well
  69. taints := node.Spec.Taints
  70. for _, taint := range taints {
  71. if taint.MatchTaint(nodectlr.UnreachableTaintTemplate) || taint.MatchTaint(nodectlr.NotReadyTaintTemplate) {
  72. hasNodeControllerTaints = true
  73. break
  74. }
  75. }
  76. if wantTrue {
  77. if (cond.Status == v1.ConditionTrue) && !hasNodeControllerTaints {
  78. return true
  79. }
  80. msg := ""
  81. if !hasNodeControllerTaints {
  82. msg = fmt.Sprintf("Condition %s of node %s is %v instead of %t. Reason: %v, message: %v",
  83. conditionType, node.Name, cond.Status == v1.ConditionTrue, wantTrue, cond.Reason, cond.Message)
  84. } else {
  85. msg = fmt.Sprintf("Condition %s of node %s is %v, but Node is tainted by NodeController with %v. Failure",
  86. conditionType, node.Name, cond.Status == v1.ConditionTrue, taints)
  87. }
  88. if !silent {
  89. e2elog.Logf(msg)
  90. }
  91. return false
  92. }
  93. // TODO: check if the Node is tainted once we enable NC notReady/unreachable taints by default
  94. if cond.Status != v1.ConditionTrue {
  95. return true
  96. }
  97. if !silent {
  98. e2elog.Logf("Condition %s of node %s is %v instead of %t. Reason: %v, message: %v",
  99. conditionType, node.Name, cond.Status == v1.ConditionTrue, wantTrue, cond.Reason, cond.Message)
  100. }
  101. return false
  102. }
  103. if (wantTrue && (cond.Status == v1.ConditionTrue)) || (!wantTrue && (cond.Status != v1.ConditionTrue)) {
  104. return true
  105. }
  106. if !silent {
  107. e2elog.Logf("Condition %s of node %s is %v instead of %t. Reason: %v, message: %v",
  108. conditionType, node.Name, cond.Status == v1.ConditionTrue, wantTrue, cond.Reason, cond.Message)
  109. }
  110. return false
  111. }
  112. }
  113. if !silent {
  114. e2elog.Logf("Couldn't find condition %v on node %v", conditionType, node.Name)
  115. }
  116. return false
  117. }
  118. // IsConditionSetAsExpected returns a wantTrue value if the node has a match to the conditionType, otherwise returns an opposite value of the wantTrue with detailed logging.
  119. func IsConditionSetAsExpected(node *v1.Node, conditionType v1.NodeConditionType, wantTrue bool) bool {
  120. return isNodeConditionSetAsExpected(node, conditionType, wantTrue, false)
  121. }
  122. // IsConditionSetAsExpectedSilent returns a wantTrue value if the node has a match to the conditionType, otherwise returns an opposite value of the wantTrue.
  123. func IsConditionSetAsExpectedSilent(node *v1.Node, conditionType v1.NodeConditionType, wantTrue bool) bool {
  124. return isNodeConditionSetAsExpected(node, conditionType, wantTrue, true)
  125. }
  126. // IsConditionUnset returns true if conditions of the given node do not have a match to the given conditionType, otherwise false.
  127. func IsConditionUnset(node *v1.Node, conditionType v1.NodeConditionType) bool {
  128. for _, cond := range node.Status.Conditions {
  129. if cond.Type == conditionType {
  130. return false
  131. }
  132. }
  133. return true
  134. }
  135. // Filter filters nodes in NodeList in place, removing nodes that do not
  136. // satisfy the given condition
  137. func Filter(nodeList *v1.NodeList, fn func(node v1.Node) bool) {
  138. var l []v1.Node
  139. for _, node := range nodeList.Items {
  140. if fn(node) {
  141. l = append(l, node)
  142. }
  143. }
  144. nodeList.Items = l
  145. }
  146. // TotalRegistered returns number of registered Nodes excluding Master Node.
  147. func TotalRegistered(c clientset.Interface) (int, error) {
  148. nodes, err := waitListSchedulableNodes(c)
  149. if err != nil {
  150. e2elog.Logf("Failed to list nodes: %v", err)
  151. return 0, err
  152. }
  153. return len(nodes.Items), nil
  154. }
  155. // TotalReady returns number of ready Nodes excluding Master Node.
  156. func TotalReady(c clientset.Interface) (int, error) {
  157. nodes, err := waitListSchedulableNodes(c)
  158. if err != nil {
  159. e2elog.Logf("Failed to list nodes: %v", err)
  160. return 0, err
  161. }
  162. // Filter out not-ready nodes.
  163. Filter(nodes, func(node v1.Node) bool {
  164. return IsConditionSetAsExpected(&node, v1.NodeReady, true)
  165. })
  166. return len(nodes.Items), nil
  167. }
  168. // GetExternalIP returns node external IP concatenated with port 22 for ssh
  169. // e.g. 1.2.3.4:22
  170. func GetExternalIP(node *v1.Node) (string, error) {
  171. e2elog.Logf("Getting external IP address for %s", node.Name)
  172. host := ""
  173. for _, a := range node.Status.Addresses {
  174. if a.Type == v1.NodeExternalIP && a.Address != "" {
  175. host = net.JoinHostPort(a.Address, sshPort)
  176. break
  177. }
  178. }
  179. if host == "" {
  180. return "", fmt.Errorf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
  181. }
  182. return host, nil
  183. }
  184. // GetInternalIP returns node internal IP
  185. func GetInternalIP(node *v1.Node) (string, error) {
  186. host := ""
  187. for _, address := range node.Status.Addresses {
  188. if address.Type == v1.NodeInternalIP {
  189. if address.Address != "" {
  190. host = net.JoinHostPort(address.Address, sshPort)
  191. break
  192. }
  193. }
  194. }
  195. if host == "" {
  196. return "", fmt.Errorf("Couldn't get the internal IP of host %s with addresses %v", node.Name, node.Status.Addresses)
  197. }
  198. return host, nil
  199. }
  200. // GetAddresses returns a list of addresses of the given addressType for the given node
  201. func GetAddresses(node *v1.Node, addressType v1.NodeAddressType) (ips []string) {
  202. for j := range node.Status.Addresses {
  203. nodeAddress := &node.Status.Addresses[j]
  204. if nodeAddress.Type == addressType && nodeAddress.Address != "" {
  205. ips = append(ips, nodeAddress.Address)
  206. }
  207. }
  208. return
  209. }
  210. // CollectAddresses returns a list of addresses of the given addressType for the given list of nodes
  211. func CollectAddresses(nodes *v1.NodeList, addressType v1.NodeAddressType) []string {
  212. ips := []string{}
  213. for i := range nodes.Items {
  214. ips = append(ips, GetAddresses(&nodes.Items[i], addressType)...)
  215. }
  216. return ips
  217. }
  218. // PickIP picks one public node IP
  219. func PickIP(c clientset.Interface) (string, error) {
  220. publicIps, err := GetPublicIps(c)
  221. if err != nil {
  222. return "", fmt.Errorf("get node public IPs error: %s", err)
  223. }
  224. if len(publicIps) == 0 {
  225. return "", fmt.Errorf("got unexpected number (%d) of public IPs", len(publicIps))
  226. }
  227. ip := publicIps[0]
  228. return ip, nil
  229. }
  230. // GetPublicIps returns a public IP list of nodes.
  231. func GetPublicIps(c clientset.Interface) ([]string, error) {
  232. nodes, err := GetReadySchedulableNodes(c)
  233. if err != nil {
  234. return nil, fmt.Errorf("get schedulable and ready nodes error: %s", err)
  235. }
  236. ips := CollectAddresses(nodes, v1.NodeExternalIP)
  237. if len(ips) == 0 {
  238. // If ExternalIP isn't set, assume the test programs can reach the InternalIP
  239. ips = CollectAddresses(nodes, v1.NodeInternalIP)
  240. }
  241. return ips, nil
  242. }
  243. // GetReadySchedulableNodes addresses the common use case of getting nodes you can do work on.
  244. // 1) Needs to be schedulable.
  245. // 2) Needs to be ready.
  246. // If EITHER 1 or 2 is not true, most tests will want to ignore the node entirely.
  247. // If there are no nodes that are both ready and schedulable, this will return an error.
  248. func GetReadySchedulableNodes(c clientset.Interface) (nodes *v1.NodeList, err error) {
  249. nodes, err = checkWaitListSchedulableNodes(c)
  250. if err != nil {
  251. return nil, fmt.Errorf("listing schedulable nodes error: %s", err)
  252. }
  253. Filter(nodes, func(node v1.Node) bool {
  254. return IsNodeSchedulable(&node) && IsNodeUntainted(&node)
  255. })
  256. if len(nodes.Items) == 0 {
  257. return nil, fmt.Errorf("there are currently no ready, schedulable nodes in the cluster")
  258. }
  259. return nodes, nil
  260. }
  261. // GetBoundedReadySchedulableNodes is like GetReadySchedulableNodes except that it returns
  262. // at most maxNodes nodes. Use this to keep your test case from blowing up when run on a
  263. // large cluster.
  264. func GetBoundedReadySchedulableNodes(c clientset.Interface, maxNodes int) (nodes *v1.NodeList, err error) {
  265. nodes, err = GetReadySchedulableNodes(c)
  266. if err != nil {
  267. return nil, err
  268. }
  269. if len(nodes.Items) > maxNodes {
  270. shuffled := make([]v1.Node, maxNodes)
  271. perm := rand.Perm(len(nodes.Items))
  272. for i, j := range perm {
  273. if j < len(shuffled) {
  274. shuffled[j] = nodes.Items[i]
  275. }
  276. }
  277. nodes.Items = shuffled
  278. }
  279. return nodes, nil
  280. }
  281. // GetRandomReadySchedulableNode gets a single randomly-selected node which is available for
  282. // running pods on. If there are no available nodes it will return an error.
  283. func GetRandomReadySchedulableNode(c clientset.Interface) (*v1.Node, error) {
  284. nodes, err := GetReadySchedulableNodes(c)
  285. if err != nil {
  286. return nil, err
  287. }
  288. return &nodes.Items[rand.Intn(len(nodes.Items))], nil
  289. }
  290. // GetReadyNodesIncludingTainted returns all ready nodes, even those which are tainted.
  291. // There are cases when we care about tainted nodes
  292. // E.g. in tests related to nodes with gpu we care about nodes despite
  293. // presence of nvidia.com/gpu=present:NoSchedule taint
  294. func GetReadyNodesIncludingTainted(c clientset.Interface) (nodes *v1.NodeList, err error) {
  295. nodes, err = checkWaitListSchedulableNodes(c)
  296. if err != nil {
  297. return nil, fmt.Errorf("listing schedulable nodes error: %s", err)
  298. }
  299. Filter(nodes, func(node v1.Node) bool {
  300. return IsNodeSchedulable(&node)
  301. })
  302. return nodes, nil
  303. }
  304. // GetMasterAndWorkerNodes will return a list masters and schedulable worker nodes
  305. func GetMasterAndWorkerNodes(c clientset.Interface) (sets.String, *v1.NodeList, error) {
  306. nodes := &v1.NodeList{}
  307. masters := sets.NewString()
  308. all, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
  309. if err != nil {
  310. return nil, nil, fmt.Errorf("get nodes error: %s", err)
  311. }
  312. for _, n := range all.Items {
  313. if system.DeprecatedMightBeMasterNode(n.Name) {
  314. masters.Insert(n.Name)
  315. } else if IsNodeSchedulable(&n) && IsNodeUntainted(&n) {
  316. nodes.Items = append(nodes.Items, n)
  317. }
  318. }
  319. return masters, nodes, nil
  320. }
  321. // IsNodeUntainted tests whether a fake pod can be scheduled on "node", given its current taints.
  322. // TODO: need to discuss wether to return bool and error type
  323. func IsNodeUntainted(node *v1.Node) bool {
  324. return isNodeUntaintedWithNonblocking(node, "")
  325. }
  326. // isNodeUntaintedWithNonblocking tests whether a fake pod can be scheduled on "node"
  327. // but allows for taints in the list of non-blocking taints.
  328. func isNodeUntaintedWithNonblocking(node *v1.Node, nonblockingTaints string) bool {
  329. fakePod := &v1.Pod{
  330. TypeMeta: metav1.TypeMeta{
  331. Kind: "Pod",
  332. APIVersion: "v1",
  333. },
  334. ObjectMeta: metav1.ObjectMeta{
  335. Name: "fake-not-scheduled",
  336. Namespace: "fake-not-scheduled",
  337. },
  338. Spec: v1.PodSpec{
  339. Containers: []v1.Container{
  340. {
  341. Name: "fake-not-scheduled",
  342. Image: "fake-not-scheduled",
  343. },
  344. },
  345. },
  346. }
  347. nodeInfo := schedulernodeinfo.NewNodeInfo()
  348. // Simple lookup for nonblocking taints based on comma-delimited list.
  349. nonblockingTaintsMap := map[string]struct{}{}
  350. for _, t := range strings.Split(nonblockingTaints, ",") {
  351. if strings.TrimSpace(t) != "" {
  352. nonblockingTaintsMap[strings.TrimSpace(t)] = struct{}{}
  353. }
  354. }
  355. if len(nonblockingTaintsMap) > 0 {
  356. nodeCopy := node.DeepCopy()
  357. nodeCopy.Spec.Taints = []v1.Taint{}
  358. for _, v := range node.Spec.Taints {
  359. if _, isNonblockingTaint := nonblockingTaintsMap[v.Key]; !isNonblockingTaint {
  360. nodeCopy.Spec.Taints = append(nodeCopy.Spec.Taints, v)
  361. }
  362. }
  363. nodeInfo.SetNode(nodeCopy)
  364. } else {
  365. nodeInfo.SetNode(node)
  366. }
  367. taints, err := nodeInfo.Taints()
  368. if err != nil {
  369. e2elog.Failf("Can't test predicates for node %s: %v", node.Name, err)
  370. return false
  371. }
  372. return v1helper.TolerationsTolerateTaintsWithFilter(fakePod.Spec.Tolerations, taints, func(t *v1.Taint) bool {
  373. return t.Effect == v1.TaintEffectNoExecute || t.Effect == v1.TaintEffectNoSchedule
  374. })
  375. }
  376. // IsNodeSchedulable returns true if:
  377. // 1) doesn't have "unschedulable" field set
  378. // 2) it also returns true from IsNodeReady
  379. func IsNodeSchedulable(node *v1.Node) bool {
  380. if node == nil {
  381. return false
  382. }
  383. return !node.Spec.Unschedulable && IsNodeReady(node)
  384. }
  385. // IsNodeReady returns true if:
  386. // 1) it's Ready condition is set to true
  387. // 2) doesn't have NetworkUnavailable condition set to true
  388. func IsNodeReady(node *v1.Node) bool {
  389. nodeReady := IsConditionSetAsExpected(node, v1.NodeReady, true)
  390. networkReady := IsConditionUnset(node, v1.NodeNetworkUnavailable) ||
  391. IsConditionSetAsExpectedSilent(node, v1.NodeNetworkUnavailable, false)
  392. return nodeReady && networkReady
  393. }
  394. // hasNonblockingTaint returns true if the node contains at least
  395. // one taint with a key matching the regexp.
  396. func hasNonblockingTaint(node *v1.Node, nonblockingTaints string) bool {
  397. if node == nil {
  398. return false
  399. }
  400. // Simple lookup for nonblocking taints based on comma-delimited list.
  401. nonblockingTaintsMap := map[string]struct{}{}
  402. for _, t := range strings.Split(nonblockingTaints, ",") {
  403. if strings.TrimSpace(t) != "" {
  404. nonblockingTaintsMap[strings.TrimSpace(t)] = struct{}{}
  405. }
  406. }
  407. for _, taint := range node.Spec.Taints {
  408. if _, hasNonblockingTaint := nonblockingTaintsMap[taint.Key]; hasNonblockingTaint {
  409. return true
  410. }
  411. }
  412. return false
  413. }
  414. // PodNodePairs return podNode pairs for all pods in a namespace
  415. func PodNodePairs(c clientset.Interface, ns string) ([]PodNode, error) {
  416. var result []PodNode
  417. podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
  418. if err != nil {
  419. return result, err
  420. }
  421. for _, pod := range podList.Items {
  422. result = append(result, PodNode{
  423. Pod: pod.Name,
  424. Node: pod.Spec.NodeName,
  425. })
  426. }
  427. return result, nil
  428. }