route_controller.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package route
  14. import (
  15. "context"
  16. "fmt"
  17. "net"
  18. "sync"
  19. "time"
  20. "k8s.io/klog"
  21. "k8s.io/api/core/v1"
  22. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  23. "k8s.io/apimachinery/pkg/labels"
  24. "k8s.io/apimachinery/pkg/types"
  25. utilruntime "k8s.io/apimachinery/pkg/util/runtime"
  26. "k8s.io/apimachinery/pkg/util/wait"
  27. coreinformers "k8s.io/client-go/informers/core/v1"
  28. clientset "k8s.io/client-go/kubernetes"
  29. "k8s.io/client-go/kubernetes/scheme"
  30. v1core "k8s.io/client-go/kubernetes/typed/core/v1"
  31. corelisters "k8s.io/client-go/listers/core/v1"
  32. "k8s.io/client-go/tools/cache"
  33. "k8s.io/client-go/tools/record"
  34. clientretry "k8s.io/client-go/util/retry"
  35. cloudprovider "k8s.io/cloud-provider"
  36. cloudnodeutil "k8s.io/cloud-provider/node/helpers"
  37. "k8s.io/component-base/metrics/prometheus/ratelimiter"
  38. )
  39. const (
  40. // Maximal number of concurrent CreateRoute API calls.
  41. // TODO: This should be per-provider.
  42. maxConcurrentRouteCreations int = 200
  43. )
  44. var updateNetworkConditionBackoff = wait.Backoff{
  45. Steps: 5, // Maximum number of retries.
  46. Duration: 100 * time.Millisecond,
  47. Jitter: 1.0,
  48. }
  49. type RouteController struct {
  50. routes cloudprovider.Routes
  51. kubeClient clientset.Interface
  52. clusterName string
  53. clusterCIDRs []*net.IPNet
  54. nodeLister corelisters.NodeLister
  55. nodeListerSynced cache.InformerSynced
  56. broadcaster record.EventBroadcaster
  57. recorder record.EventRecorder
  58. }
  59. func New(routes cloudprovider.Routes, kubeClient clientset.Interface, nodeInformer coreinformers.NodeInformer, clusterName string, clusterCIDRs []*net.IPNet) *RouteController {
  60. if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
  61. ratelimiter.RegisterMetricAndTrackRateLimiterUsage("route_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter())
  62. }
  63. if len(clusterCIDRs) == 0 {
  64. klog.Fatal("RouteController: Must specify clusterCIDR.")
  65. }
  66. eventBroadcaster := record.NewBroadcaster()
  67. eventBroadcaster.StartLogging(klog.Infof)
  68. recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "route_controller"})
  69. rc := &RouteController{
  70. routes: routes,
  71. kubeClient: kubeClient,
  72. clusterName: clusterName,
  73. clusterCIDRs: clusterCIDRs,
  74. nodeLister: nodeInformer.Lister(),
  75. nodeListerSynced: nodeInformer.Informer().HasSynced,
  76. broadcaster: eventBroadcaster,
  77. recorder: recorder,
  78. }
  79. return rc
  80. }
  81. func (rc *RouteController) Run(stopCh <-chan struct{}, syncPeriod time.Duration) {
  82. defer utilruntime.HandleCrash()
  83. klog.Info("Starting route controller")
  84. defer klog.Info("Shutting down route controller")
  85. if !cache.WaitForNamedCacheSync("route", stopCh, rc.nodeListerSynced) {
  86. return
  87. }
  88. if rc.broadcaster != nil {
  89. rc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: rc.kubeClient.CoreV1().Events("")})
  90. }
  91. // TODO: If we do just the full Resync every 5 minutes (default value)
  92. // that means that we may wait up to 5 minutes before even starting
  93. // creating a route for it. This is bad.
  94. // We should have a watch on node and if we observe a new node (with CIDR?)
  95. // trigger reconciliation for that node.
  96. go wait.NonSlidingUntil(func() {
  97. if err := rc.reconcileNodeRoutes(); err != nil {
  98. klog.Errorf("Couldn't reconcile node routes: %v", err)
  99. }
  100. }, syncPeriod, stopCh)
  101. <-stopCh
  102. }
  103. func (rc *RouteController) reconcileNodeRoutes() error {
  104. routeList, err := rc.routes.ListRoutes(context.TODO(), rc.clusterName)
  105. if err != nil {
  106. return fmt.Errorf("error listing routes: %v", err)
  107. }
  108. nodes, err := rc.nodeLister.List(labels.Everything())
  109. if err != nil {
  110. return fmt.Errorf("error listing nodes: %v", err)
  111. }
  112. return rc.reconcile(nodes, routeList)
  113. }
  114. func (rc *RouteController) reconcile(nodes []*v1.Node, routes []*cloudprovider.Route) error {
  115. var l sync.Mutex
  116. // for each node a map of podCIDRs and their created status
  117. nodeRoutesStatuses := make(map[types.NodeName]map[string]bool)
  118. // routeMap maps routeTargetNode->route
  119. routeMap := make(map[types.NodeName][]*cloudprovider.Route)
  120. for _, route := range routes {
  121. if route.TargetNode != "" {
  122. routeMap[route.TargetNode] = append(routeMap[route.TargetNode], route)
  123. }
  124. }
  125. wg := sync.WaitGroup{}
  126. rateLimiter := make(chan struct{}, maxConcurrentRouteCreations)
  127. // searches existing routes by node for a matching route
  128. for _, node := range nodes {
  129. // Skip if the node hasn't been assigned a CIDR yet.
  130. if len(node.Spec.PodCIDRs) == 0 {
  131. continue
  132. }
  133. nodeName := types.NodeName(node.Name)
  134. l.Lock()
  135. nodeRoutesStatuses[nodeName] = make(map[string]bool)
  136. l.Unlock()
  137. // for every node, for every cidr
  138. for _, podCIDR := range node.Spec.PodCIDRs {
  139. // we add it to our nodeCIDRs map here because add and delete go routines run at the same time
  140. l.Lock()
  141. nodeRoutesStatuses[nodeName][podCIDR] = false
  142. l.Unlock()
  143. // ignore if already created
  144. if hasRoute(routeMap, nodeName, podCIDR) {
  145. l.Lock()
  146. nodeRoutesStatuses[nodeName][podCIDR] = true // a route for this podCIDR is already created
  147. l.Unlock()
  148. continue
  149. }
  150. // if we are here, then a route needs to be created for this node
  151. route := &cloudprovider.Route{
  152. TargetNode: nodeName,
  153. DestinationCIDR: podCIDR,
  154. }
  155. // cloud providers that:
  156. // - depend on nameHint
  157. // - trying to support dual stack
  158. // will have to carefully generate new route names that allow node->(multi cidr)
  159. nameHint := string(node.UID)
  160. wg.Add(1)
  161. go func(nodeName types.NodeName, nameHint string, route *cloudprovider.Route) {
  162. defer wg.Done()
  163. err := clientretry.RetryOnConflict(updateNetworkConditionBackoff, func() error {
  164. startTime := time.Now()
  165. // Ensure that we don't have more than maxConcurrentRouteCreations
  166. // CreateRoute calls in flight.
  167. rateLimiter <- struct{}{}
  168. klog.Infof("Creating route for node %s %s with hint %s, throttled %v", nodeName, route.DestinationCIDR, nameHint, time.Since(startTime))
  169. err := rc.routes.CreateRoute(context.TODO(), rc.clusterName, nameHint, route)
  170. <-rateLimiter
  171. if err != nil {
  172. msg := fmt.Sprintf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Since(startTime), err)
  173. if rc.recorder != nil {
  174. rc.recorder.Eventf(
  175. &v1.ObjectReference{
  176. Kind: "Node",
  177. Name: string(nodeName),
  178. UID: types.UID(nodeName),
  179. Namespace: "",
  180. }, v1.EventTypeWarning, "FailedToCreateRoute", msg)
  181. klog.V(4).Infof(msg)
  182. return err
  183. }
  184. }
  185. l.Lock()
  186. nodeRoutesStatuses[nodeName][route.DestinationCIDR] = true
  187. l.Unlock()
  188. klog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Since(startTime))
  189. return nil
  190. })
  191. if err != nil {
  192. klog.Errorf("Could not create route %s %s for node %s: %v", nameHint, route.DestinationCIDR, nodeName, err)
  193. }
  194. }(nodeName, nameHint, route)
  195. }
  196. }
  197. // searches our bag of node->cidrs for a match
  198. nodeHasCidr := func(nodeName types.NodeName, cidr string) bool {
  199. l.Lock()
  200. defer l.Unlock()
  201. nodeRoutes := nodeRoutesStatuses[nodeName]
  202. if nodeRoutes == nil {
  203. return false
  204. }
  205. _, exist := nodeRoutes[cidr]
  206. return exist
  207. }
  208. // delete routes that are not in use
  209. for _, route := range routes {
  210. if rc.isResponsibleForRoute(route) {
  211. // Check if this route is a blackhole, or applies to a node we know about & has an incorrect CIDR.
  212. if route.Blackhole || !nodeHasCidr(route.TargetNode, route.DestinationCIDR) {
  213. wg.Add(1)
  214. // Delete the route.
  215. go func(route *cloudprovider.Route, startTime time.Time) {
  216. defer wg.Done()
  217. // respect the rate limiter
  218. rateLimiter <- struct{}{}
  219. klog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR)
  220. if err := rc.routes.DeleteRoute(context.TODO(), rc.clusterName, route); err != nil {
  221. klog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Since(startTime), err)
  222. } else {
  223. klog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Since(startTime))
  224. }
  225. <-rateLimiter
  226. }(route, time.Now())
  227. }
  228. }
  229. }
  230. wg.Wait()
  231. // after all routes have been created (or not), we start updating
  232. // all nodes' statuses with the outcome
  233. for _, node := range nodes {
  234. wg.Add(1)
  235. nodeRoutes := nodeRoutesStatuses[types.NodeName(node.Name)]
  236. allRoutesCreated := true
  237. if len(nodeRoutes) == 0 {
  238. go func(n *v1.Node) {
  239. defer wg.Done()
  240. klog.Infof("node %v has no routes assigned to it. NodeNetworkUnavailable will be set to true", n.Name)
  241. rc.updateNetworkingCondition(n, false)
  242. }(node)
  243. continue
  244. }
  245. // check if all routes were created. if so, then it should be ready
  246. for _, created := range nodeRoutes {
  247. if !created {
  248. allRoutesCreated = false
  249. break
  250. }
  251. }
  252. go func(n *v1.Node) {
  253. defer wg.Done()
  254. rc.updateNetworkingCondition(n, allRoutesCreated)
  255. }(node)
  256. }
  257. wg.Wait()
  258. return nil
  259. }
  260. func (rc *RouteController) updateNetworkingCondition(node *v1.Node, routesCreated bool) error {
  261. _, condition := cloudnodeutil.GetNodeCondition(&(node.Status), v1.NodeNetworkUnavailable)
  262. if routesCreated && condition != nil && condition.Status == v1.ConditionFalse {
  263. klog.V(2).Infof("set node %v with NodeNetworkUnavailable=false was canceled because it is already set", node.Name)
  264. return nil
  265. }
  266. if !routesCreated && condition != nil && condition.Status == v1.ConditionTrue {
  267. klog.V(2).Infof("set node %v with NodeNetworkUnavailable=true was canceled because it is already set", node.Name)
  268. return nil
  269. }
  270. klog.Infof("Patching node status %v with %v previous condition was:%+v", node.Name, routesCreated, condition)
  271. // either condition is not there, or has a value != to what we need
  272. // start setting it
  273. err := clientretry.RetryOnConflict(updateNetworkConditionBackoff, func() error {
  274. var err error
  275. // Patch could also fail, even though the chance is very slim. So we still do
  276. // patch in the retry loop.
  277. currentTime := metav1.Now()
  278. if routesCreated {
  279. err = cloudnodeutil.SetNodeCondition(rc.kubeClient, types.NodeName(node.Name), v1.NodeCondition{
  280. Type: v1.NodeNetworkUnavailable,
  281. Status: v1.ConditionFalse,
  282. Reason: "RouteCreated",
  283. Message: "RouteController created a route",
  284. LastTransitionTime: currentTime,
  285. })
  286. } else {
  287. err = cloudnodeutil.SetNodeCondition(rc.kubeClient, types.NodeName(node.Name), v1.NodeCondition{
  288. Type: v1.NodeNetworkUnavailable,
  289. Status: v1.ConditionTrue,
  290. Reason: "NoRouteCreated",
  291. Message: "RouteController failed to create a route",
  292. LastTransitionTime: currentTime,
  293. })
  294. }
  295. if err != nil {
  296. klog.V(4).Infof("Error updating node %s, retrying: %v", types.NodeName(node.Name), err)
  297. }
  298. return err
  299. })
  300. if err != nil {
  301. klog.Errorf("Error updating node %s: %v", node.Name, err)
  302. }
  303. return err
  304. }
  305. func (rc *RouteController) isResponsibleForRoute(route *cloudprovider.Route) bool {
  306. _, cidr, err := net.ParseCIDR(route.DestinationCIDR)
  307. if err != nil {
  308. klog.Errorf("Ignoring route %s, unparsable CIDR: %v", route.Name, err)
  309. return false
  310. }
  311. // Not responsible if this route's CIDR is not within our clusterCIDR
  312. lastIP := make([]byte, len(cidr.IP))
  313. for i := range lastIP {
  314. lastIP[i] = cidr.IP[i] | ^cidr.Mask[i]
  315. }
  316. // check across all cluster cidrs
  317. for _, clusterCIDR := range rc.clusterCIDRs {
  318. if clusterCIDR.Contains(cidr.IP) || clusterCIDR.Contains(lastIP) {
  319. return true
  320. }
  321. }
  322. return false
  323. }
  324. // checks if a node owns a route with a specific cidr
  325. func hasRoute(rm map[types.NodeName][]*cloudprovider.Route, nodeName types.NodeName, cidr string) bool {
  326. if routes, ok := rm[nodeName]; ok {
  327. for _, route := range routes {
  328. if route.DestinationCIDR == cidr {
  329. return true
  330. }
  331. }
  332. }
  333. return false
  334. }