route_controller.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package route
  14. import (
  15. "context"
  16. "fmt"
  17. "net"
  18. "sync"
  19. "time"
  20. "k8s.io/klog"
  21. "k8s.io/api/core/v1"
  22. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  23. "k8s.io/apimachinery/pkg/labels"
  24. "k8s.io/apimachinery/pkg/types"
  25. utilruntime "k8s.io/apimachinery/pkg/util/runtime"
  26. "k8s.io/apimachinery/pkg/util/wait"
  27. coreinformers "k8s.io/client-go/informers/core/v1"
  28. clientset "k8s.io/client-go/kubernetes"
  29. "k8s.io/client-go/kubernetes/scheme"
  30. v1core "k8s.io/client-go/kubernetes/typed/core/v1"
  31. corelisters "k8s.io/client-go/listers/core/v1"
  32. "k8s.io/client-go/tools/cache"
  33. "k8s.io/client-go/tools/record"
  34. clientretry "k8s.io/client-go/util/retry"
  35. cloudprovider "k8s.io/cloud-provider"
  36. "k8s.io/kubernetes/pkg/controller"
  37. nodeutil "k8s.io/kubernetes/pkg/controller/util/node"
  38. "k8s.io/kubernetes/pkg/util/metrics"
  39. utilnode "k8s.io/kubernetes/pkg/util/node"
  40. )
  41. const (
  42. // Maximal number of concurrent CreateRoute API calls.
  43. // TODO: This should be per-provider.
  44. maxConcurrentRouteCreations int = 200
  45. )
  46. var updateNetworkConditionBackoff = wait.Backoff{
  47. Steps: 5, // Maximum number of retries.
  48. Duration: 100 * time.Millisecond,
  49. Jitter: 1.0,
  50. }
  51. type RouteController struct {
  52. routes cloudprovider.Routes
  53. kubeClient clientset.Interface
  54. clusterName string
  55. clusterCIDR *net.IPNet
  56. nodeLister corelisters.NodeLister
  57. nodeListerSynced cache.InformerSynced
  58. broadcaster record.EventBroadcaster
  59. recorder record.EventRecorder
  60. }
  61. func New(routes cloudprovider.Routes, kubeClient clientset.Interface, nodeInformer coreinformers.NodeInformer, clusterName string, clusterCIDR *net.IPNet) *RouteController {
  62. if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
  63. metrics.RegisterMetricAndTrackRateLimiterUsage("route_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter())
  64. }
  65. if clusterCIDR == nil {
  66. klog.Fatal("RouteController: Must specify clusterCIDR.")
  67. }
  68. eventBroadcaster := record.NewBroadcaster()
  69. eventBroadcaster.StartLogging(klog.Infof)
  70. recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "route_controller"})
  71. rc := &RouteController{
  72. routes: routes,
  73. kubeClient: kubeClient,
  74. clusterName: clusterName,
  75. clusterCIDR: clusterCIDR,
  76. nodeLister: nodeInformer.Lister(),
  77. nodeListerSynced: nodeInformer.Informer().HasSynced,
  78. broadcaster: eventBroadcaster,
  79. recorder: recorder,
  80. }
  81. return rc
  82. }
  83. func (rc *RouteController) Run(stopCh <-chan struct{}, syncPeriod time.Duration) {
  84. defer utilruntime.HandleCrash()
  85. klog.Info("Starting route controller")
  86. defer klog.Info("Shutting down route controller")
  87. if !controller.WaitForCacheSync("route", stopCh, rc.nodeListerSynced) {
  88. return
  89. }
  90. if rc.broadcaster != nil {
  91. rc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: rc.kubeClient.CoreV1().Events("")})
  92. }
  93. // TODO: If we do just the full Resync every 5 minutes (default value)
  94. // that means that we may wait up to 5 minutes before even starting
  95. // creating a route for it. This is bad.
  96. // We should have a watch on node and if we observe a new node (with CIDR?)
  97. // trigger reconciliation for that node.
  98. go wait.NonSlidingUntil(func() {
  99. if err := rc.reconcileNodeRoutes(); err != nil {
  100. klog.Errorf("Couldn't reconcile node routes: %v", err)
  101. }
  102. }, syncPeriod, stopCh)
  103. <-stopCh
  104. }
  105. func (rc *RouteController) reconcileNodeRoutes() error {
  106. routeList, err := rc.routes.ListRoutes(context.TODO(), rc.clusterName)
  107. if err != nil {
  108. return fmt.Errorf("error listing routes: %v", err)
  109. }
  110. nodes, err := rc.nodeLister.List(labels.Everything())
  111. if err != nil {
  112. return fmt.Errorf("error listing nodes: %v", err)
  113. }
  114. return rc.reconcile(nodes, routeList)
  115. }
  116. func (rc *RouteController) reconcile(nodes []*v1.Node, routes []*cloudprovider.Route) error {
  117. // nodeCIDRs maps nodeName->nodeCIDR
  118. nodeCIDRs := make(map[types.NodeName]string)
  119. // routeMap maps routeTargetNode->route
  120. routeMap := make(map[types.NodeName]*cloudprovider.Route)
  121. for _, route := range routes {
  122. if route.TargetNode != "" {
  123. routeMap[route.TargetNode] = route
  124. }
  125. }
  126. wg := sync.WaitGroup{}
  127. rateLimiter := make(chan struct{}, maxConcurrentRouteCreations)
  128. for _, node := range nodes {
  129. // Skip if the node hasn't been assigned a CIDR yet.
  130. if node.Spec.PodCIDR == "" {
  131. continue
  132. }
  133. nodeName := types.NodeName(node.Name)
  134. // Check if we have a route for this node w/ the correct CIDR.
  135. r := routeMap[nodeName]
  136. if r == nil || r.DestinationCIDR != node.Spec.PodCIDR {
  137. // If not, create the route.
  138. route := &cloudprovider.Route{
  139. TargetNode: nodeName,
  140. DestinationCIDR: node.Spec.PodCIDR,
  141. }
  142. nameHint := string(node.UID)
  143. wg.Add(1)
  144. go func(nodeName types.NodeName, nameHint string, route *cloudprovider.Route) {
  145. defer wg.Done()
  146. err := clientretry.RetryOnConflict(updateNetworkConditionBackoff, func() error {
  147. startTime := time.Now()
  148. // Ensure that we don't have more than maxConcurrentRouteCreations
  149. // CreateRoute calls in flight.
  150. rateLimiter <- struct{}{}
  151. klog.Infof("Creating route for node %s %s with hint %s, throttled %v", nodeName, route.DestinationCIDR, nameHint, time.Since(startTime))
  152. err := rc.routes.CreateRoute(context.TODO(), rc.clusterName, nameHint, route)
  153. <-rateLimiter
  154. rc.updateNetworkingCondition(nodeName, err == nil)
  155. if err != nil {
  156. msg := fmt.Sprintf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Since(startTime), err)
  157. if rc.recorder != nil {
  158. rc.recorder.Eventf(
  159. &v1.ObjectReference{
  160. Kind: "Node",
  161. Name: string(nodeName),
  162. UID: types.UID(nodeName),
  163. Namespace: "",
  164. }, v1.EventTypeWarning, "FailedToCreateRoute", msg)
  165. }
  166. klog.V(4).Infof(msg)
  167. return err
  168. }
  169. klog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Since(startTime))
  170. return nil
  171. })
  172. if err != nil {
  173. klog.Errorf("Could not create route %s %s for node %s: %v", nameHint, route.DestinationCIDR, nodeName, err)
  174. }
  175. }(nodeName, nameHint, route)
  176. } else {
  177. // Update condition only if it doesn't reflect the current state.
  178. _, condition := nodeutil.GetNodeCondition(&node.Status, v1.NodeNetworkUnavailable)
  179. if condition == nil || condition.Status != v1.ConditionFalse {
  180. rc.updateNetworkingCondition(types.NodeName(node.Name), true)
  181. }
  182. }
  183. nodeCIDRs[nodeName] = node.Spec.PodCIDR
  184. }
  185. for _, route := range routes {
  186. if rc.isResponsibleForRoute(route) {
  187. // Check if this route is a blackhole, or applies to a node we know about & has an incorrect CIDR.
  188. if route.Blackhole || (nodeCIDRs[route.TargetNode] != route.DestinationCIDR) {
  189. wg.Add(1)
  190. // Delete the route.
  191. go func(route *cloudprovider.Route, startTime time.Time) {
  192. defer wg.Done()
  193. rateLimiter <- struct{}{}
  194. klog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR)
  195. if err := rc.routes.DeleteRoute(context.TODO(), rc.clusterName, route); err != nil {
  196. klog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Since(startTime), err)
  197. } else {
  198. klog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Since(startTime))
  199. }
  200. <-rateLimiter
  201. }(route, time.Now())
  202. }
  203. }
  204. }
  205. wg.Wait()
  206. return nil
  207. }
  208. func (rc *RouteController) updateNetworkingCondition(nodeName types.NodeName, routeCreated bool) error {
  209. err := clientretry.RetryOnConflict(updateNetworkConditionBackoff, func() error {
  210. var err error
  211. // Patch could also fail, even though the chance is very slim. So we still do
  212. // patch in the retry loop.
  213. currentTime := metav1.Now()
  214. if routeCreated {
  215. err = utilnode.SetNodeCondition(rc.kubeClient, nodeName, v1.NodeCondition{
  216. Type: v1.NodeNetworkUnavailable,
  217. Status: v1.ConditionFalse,
  218. Reason: "RouteCreated",
  219. Message: "RouteController created a route",
  220. LastTransitionTime: currentTime,
  221. })
  222. } else {
  223. err = utilnode.SetNodeCondition(rc.kubeClient, nodeName, v1.NodeCondition{
  224. Type: v1.NodeNetworkUnavailable,
  225. Status: v1.ConditionTrue,
  226. Reason: "NoRouteCreated",
  227. Message: "RouteController failed to create a route",
  228. LastTransitionTime: currentTime,
  229. })
  230. }
  231. if err != nil {
  232. klog.V(4).Infof("Error updating node %s, retrying: %v", nodeName, err)
  233. }
  234. return err
  235. })
  236. if err != nil {
  237. klog.Errorf("Error updating node %s: %v", nodeName, err)
  238. }
  239. return err
  240. }
  241. func (rc *RouteController) isResponsibleForRoute(route *cloudprovider.Route) bool {
  242. _, cidr, err := net.ParseCIDR(route.DestinationCIDR)
  243. if err != nil {
  244. klog.Errorf("Ignoring route %s, unparsable CIDR: %v", route.Name, err)
  245. return false
  246. }
  247. // Not responsible if this route's CIDR is not within our clusterCIDR
  248. lastIP := make([]byte, len(cidr.IP))
  249. for i := range lastIP {
  250. lastIP[i] = cidr.IP[i] | ^cidr.Mask[i]
  251. }
  252. if !rc.clusterCIDR.Contains(cidr.IP) || !rc.clusterCIDR.Contains(lastIP) {
  253. return false
  254. }
  255. return true
  256. }