cloud_cidr_allocator.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package ipam
  14. import (
  15. "fmt"
  16. "math/rand"
  17. "net"
  18. "sync"
  19. "time"
  20. "k8s.io/klog"
  21. "k8s.io/api/core/v1"
  22. "k8s.io/apimachinery/pkg/api/errors"
  23. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  24. "k8s.io/apimachinery/pkg/types"
  25. utilruntime "k8s.io/apimachinery/pkg/util/runtime"
  26. informers "k8s.io/client-go/informers/core/v1"
  27. corelisters "k8s.io/client-go/listers/core/v1"
  28. "k8s.io/client-go/tools/cache"
  29. "k8s.io/client-go/tools/record"
  30. clientset "k8s.io/client-go/kubernetes"
  31. "k8s.io/client-go/kubernetes/scheme"
  32. v1core "k8s.io/client-go/kubernetes/typed/core/v1"
  33. cloudprovider "k8s.io/cloud-provider"
  34. "k8s.io/kubernetes/pkg/controller"
  35. nodeutil "k8s.io/kubernetes/pkg/controller/util/node"
  36. schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
  37. utilnode "k8s.io/kubernetes/pkg/util/node"
  38. utiltaints "k8s.io/kubernetes/pkg/util/taints"
  39. "k8s.io/legacy-cloud-providers/gce"
  40. )
  41. // nodeProcessingInfo tracks information related to current nodes in processing
  42. type nodeProcessingInfo struct {
  43. retries int
  44. }
  45. // cloudCIDRAllocator allocates node CIDRs according to IP address aliases
  46. // assigned by the cloud provider. In this case, the allocation and
  47. // deallocation is delegated to the external provider, and the controller
  48. // merely takes the assignment and updates the node spec.
  49. type cloudCIDRAllocator struct {
  50. client clientset.Interface
  51. cloud *gce.Cloud
  52. // nodeLister is able to list/get nodes and is populated by the shared informer passed to
  53. // NewCloudCIDRAllocator.
  54. nodeLister corelisters.NodeLister
  55. // nodesSynced returns true if the node shared informer has been synced at least once.
  56. nodesSynced cache.InformerSynced
  57. // Channel that is used to pass updating Nodes to the background.
  58. // This increases the throughput of CIDR assignment by parallelization
  59. // and not blocking on long operations (which shouldn't be done from
  60. // event handlers anyway).
  61. nodeUpdateChannel chan string
  62. recorder record.EventRecorder
  63. // Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation
  64. lock sync.Mutex
  65. nodesInProcessing map[string]*nodeProcessingInfo
  66. }
  67. var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
  68. // NewCloudCIDRAllocator creates a new cloud CIDR allocator.
  69. func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
  70. if client == nil {
  71. klog.Fatalf("kubeClient is nil when starting NodeController")
  72. }
  73. eventBroadcaster := record.NewBroadcaster()
  74. recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cidrAllocator"})
  75. eventBroadcaster.StartLogging(klog.Infof)
  76. klog.V(0).Infof("Sending events to api server.")
  77. eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: client.CoreV1().Events("")})
  78. gceCloud, ok := cloud.(*gce.Cloud)
  79. if !ok {
  80. err := fmt.Errorf("cloudCIDRAllocator does not support %v provider", cloud.ProviderName())
  81. return nil, err
  82. }
  83. ca := &cloudCIDRAllocator{
  84. client: client,
  85. cloud: gceCloud,
  86. nodeLister: nodeInformer.Lister(),
  87. nodesSynced: nodeInformer.Informer().HasSynced,
  88. nodeUpdateChannel: make(chan string, cidrUpdateQueueSize),
  89. recorder: recorder,
  90. nodesInProcessing: map[string]*nodeProcessingInfo{},
  91. }
  92. nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
  93. AddFunc: nodeutil.CreateAddNodeHandler(ca.AllocateOrOccupyCIDR),
  94. UpdateFunc: nodeutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
  95. if newNode.Spec.PodCIDR == "" {
  96. return ca.AllocateOrOccupyCIDR(newNode)
  97. }
  98. // Even if PodCIDR is assigned, but NetworkUnavailable condition is
  99. // set to true, we need to process the node to set the condition.
  100. networkUnavailableTaint := &v1.Taint{Key: schedulerapi.TaintNodeNetworkUnavailable, Effect: v1.TaintEffectNoSchedule}
  101. _, cond := nodeutil.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable)
  102. if cond == nil || cond.Status != v1.ConditionFalse || utiltaints.TaintExists(newNode.Spec.Taints, networkUnavailableTaint) {
  103. return ca.AllocateOrOccupyCIDR(newNode)
  104. }
  105. return nil
  106. }),
  107. DeleteFunc: nodeutil.CreateDeleteNodeHandler(ca.ReleaseCIDR),
  108. })
  109. klog.V(0).Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName())
  110. return ca, nil
  111. }
  112. func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) {
  113. defer utilruntime.HandleCrash()
  114. klog.Infof("Starting cloud CIDR allocator")
  115. defer klog.Infof("Shutting down cloud CIDR allocator")
  116. if !controller.WaitForCacheSync("cidrallocator", stopCh, ca.nodesSynced) {
  117. return
  118. }
  119. for i := 0; i < cidrUpdateWorkers; i++ {
  120. go ca.worker(stopCh)
  121. }
  122. <-stopCh
  123. }
  124. func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) {
  125. for {
  126. select {
  127. case workItem, ok := <-ca.nodeUpdateChannel:
  128. if !ok {
  129. klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
  130. return
  131. }
  132. if err := ca.updateCIDRAllocation(workItem); err == nil {
  133. klog.V(3).Infof("Updated CIDR for %q", workItem)
  134. } else {
  135. klog.Errorf("Error updating CIDR for %q: %v", workItem, err)
  136. if canRetry, timeout := ca.retryParams(workItem); canRetry {
  137. klog.V(2).Infof("Retrying update for %q after %v", workItem, timeout)
  138. time.AfterFunc(timeout, func() {
  139. // Requeue the failed node for update again.
  140. ca.nodeUpdateChannel <- workItem
  141. })
  142. continue
  143. }
  144. klog.Errorf("Exceeded retry count for %q, dropping from queue", workItem)
  145. }
  146. ca.removeNodeFromProcessing(workItem)
  147. case <-stopChan:
  148. return
  149. }
  150. }
  151. }
  152. func (ca *cloudCIDRAllocator) insertNodeToProcessing(nodeName string) bool {
  153. ca.lock.Lock()
  154. defer ca.lock.Unlock()
  155. if _, found := ca.nodesInProcessing[nodeName]; found {
  156. return false
  157. }
  158. ca.nodesInProcessing[nodeName] = &nodeProcessingInfo{}
  159. return true
  160. }
  161. func (ca *cloudCIDRAllocator) retryParams(nodeName string) (bool, time.Duration) {
  162. ca.lock.Lock()
  163. defer ca.lock.Unlock()
  164. entry, ok := ca.nodesInProcessing[nodeName]
  165. if !ok {
  166. klog.Errorf("Cannot get retryParams for %q as entry does not exist", nodeName)
  167. return false, 0
  168. }
  169. count := entry.retries + 1
  170. if count > updateMaxRetries {
  171. return false, 0
  172. }
  173. ca.nodesInProcessing[nodeName].retries = count
  174. return true, nodeUpdateRetryTimeout(count)
  175. }
  176. func nodeUpdateRetryTimeout(count int) time.Duration {
  177. timeout := updateRetryTimeout
  178. for i := 0; i < count && timeout < maxUpdateRetryTimeout; i++ {
  179. timeout *= 2
  180. }
  181. if timeout > maxUpdateRetryTimeout {
  182. timeout = maxUpdateRetryTimeout
  183. }
  184. return time.Duration(timeout.Nanoseconds()/2 + rand.Int63n(timeout.Nanoseconds()))
  185. }
  186. func (ca *cloudCIDRAllocator) removeNodeFromProcessing(nodeName string) {
  187. ca.lock.Lock()
  188. defer ca.lock.Unlock()
  189. delete(ca.nodesInProcessing, nodeName)
  190. }
  191. // WARNING: If you're adding any return calls or defer any more work from this
  192. // function you have to make sure to update nodesInProcessing properly with the
  193. // disposition of the node when the work is done.
  194. func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
  195. if node == nil {
  196. return nil
  197. }
  198. if !ca.insertNodeToProcessing(node.Name) {
  199. klog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
  200. return nil
  201. }
  202. klog.V(4).Infof("Putting node %s into the work queue", node.Name)
  203. ca.nodeUpdateChannel <- node.Name
  204. return nil
  205. }
  206. // updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
  207. func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
  208. node, err := ca.nodeLister.Get(nodeName)
  209. if err != nil {
  210. if errors.IsNotFound(err) {
  211. return nil // node no longer available, skip processing
  212. }
  213. klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDR: %v", nodeName, err)
  214. return err
  215. }
  216. cidrs, err := ca.cloud.AliasRanges(types.NodeName(nodeName))
  217. if err != nil {
  218. nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
  219. return fmt.Errorf("failed to allocate cidr: %v", err)
  220. }
  221. if len(cidrs) == 0 {
  222. nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
  223. return fmt.Errorf("failed to allocate cidr: Node %v has no CIDRs", node.Name)
  224. }
  225. _, cidr, err := net.ParseCIDR(cidrs[0])
  226. if err != nil {
  227. return fmt.Errorf("failed to parse string '%s' as a CIDR: %v", cidrs[0], err)
  228. }
  229. podCIDR := cidr.String()
  230. if node.Spec.PodCIDR == podCIDR {
  231. klog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, podCIDR)
  232. // We don't return here, in order to set the NetworkUnavailable condition later below.
  233. } else {
  234. if node.Spec.PodCIDR != "" {
  235. klog.Errorf("PodCIDR being reassigned! Node %v spec has %v, but cloud provider has assigned %v", node.Name, node.Spec.PodCIDR, podCIDR)
  236. // We fall through and set the CIDR despite this error. This
  237. // implements the same logic as implemented in the
  238. // rangeAllocator.
  239. //
  240. // See https://github.com/kubernetes/kubernetes/pull/42147#discussion_r103357248
  241. }
  242. for i := 0; i < cidrUpdateRetries; i++ {
  243. if err = utilnode.PatchNodeCIDR(ca.client, types.NodeName(node.Name), podCIDR); err == nil {
  244. klog.Infof("Set node %v PodCIDR to %v", node.Name, podCIDR)
  245. break
  246. }
  247. }
  248. }
  249. if err != nil {
  250. nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
  251. klog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, podCIDR, err)
  252. return err
  253. }
  254. err = utilnode.SetNodeCondition(ca.client, types.NodeName(node.Name), v1.NodeCondition{
  255. Type: v1.NodeNetworkUnavailable,
  256. Status: v1.ConditionFalse,
  257. Reason: "RouteCreated",
  258. Message: "NodeController create implicit route",
  259. LastTransitionTime: metav1.Now(),
  260. })
  261. if err != nil {
  262. klog.Errorf("Error setting route status for node %v: %v", node.Name, err)
  263. }
  264. return err
  265. }
  266. func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error {
  267. klog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)",
  268. node.Name, node.Spec.PodCIDR)
  269. return nil
  270. }