graph.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package node
  14. import (
  15. "sync"
  16. corev1 "k8s.io/api/core/v1"
  17. pvutil "k8s.io/kubernetes/pkg/api/v1/persistentvolume"
  18. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  19. "k8s.io/kubernetes/third_party/forked/gonum/graph"
  20. "k8s.io/kubernetes/third_party/forked/gonum/graph/simple"
  21. )
  22. // namedVertex implements graph.Node and remembers the type, namespace, and name of its related API object
  23. type namedVertex struct {
  24. name string
  25. namespace string
  26. id int
  27. vertexType vertexType
  28. }
  29. func newNamedVertex(vertexType vertexType, namespace, name string, id int) *namedVertex {
  30. return &namedVertex{
  31. vertexType: vertexType,
  32. name: name,
  33. namespace: namespace,
  34. id: id,
  35. }
  36. }
  37. func (n *namedVertex) ID() int {
  38. return n.id
  39. }
  40. func (n *namedVertex) String() string {
  41. if len(n.namespace) == 0 {
  42. return vertexTypes[n.vertexType] + ":" + n.name
  43. }
  44. return vertexTypes[n.vertexType] + ":" + n.namespace + "/" + n.name
  45. }
  46. // destinationEdge is a graph edge that includes a denormalized reference to the final destination vertex.
  47. // This should only be used when there is a single leaf vertex reachable from T.
  48. type destinationEdge struct {
  49. F graph.Node
  50. T graph.Node
  51. Destination graph.Node
  52. }
  53. func newDestinationEdge(from, to, destination graph.Node) graph.Edge {
  54. return &destinationEdge{F: from, T: to, Destination: destination}
  55. }
  56. func (e *destinationEdge) From() graph.Node { return e.F }
  57. func (e *destinationEdge) To() graph.Node { return e.T }
  58. func (e *destinationEdge) Weight() float64 { return 0 }
  59. func (e *destinationEdge) DestinationID() int { return e.Destination.ID() }
  60. // Graph holds graph vertices and a way to look up a vertex for a particular API type/namespace/name.
  61. // All edges point toward the vertices representing Kubernetes nodes:
  62. //
  63. // node <- pod
  64. // pod <- secret,configmap,pvc
  65. // pvc <- pv
  66. // pv <- secret
  67. type Graph struct {
  68. lock sync.RWMutex
  69. graph *simple.DirectedAcyclicGraph
  70. // vertices is a map of type -> namespace -> name -> vertex
  71. vertices map[vertexType]namespaceVertexMapping
  72. // destinationEdgeIndex is a map of vertex -> set of destination IDs
  73. destinationEdgeIndex map[int]*intSet
  74. // destinationEdgeThreshold is the minimum number of distinct destination IDs at which to maintain an index
  75. destinationEdgeThreshold int
  76. }
  77. // namespaceVertexMapping is a map of namespace -> name -> vertex
  78. type namespaceVertexMapping map[string]nameVertexMapping
  79. // nameVertexMapping is a map of name -> vertex
  80. type nameVertexMapping map[string]*namedVertex
  81. func NewGraph() *Graph {
  82. return &Graph{
  83. vertices: map[vertexType]namespaceVertexMapping{},
  84. graph: simple.NewDirectedAcyclicGraph(0, 0),
  85. destinationEdgeIndex: map[int]*intSet{},
  86. // experimentally determined to be the point at which iteration adds an order of magnitude to the authz check.
  87. // since maintaining indexes costs time/memory while processing graph changes, we don't want to make this too low.
  88. destinationEdgeThreshold: 200,
  89. }
  90. }
  91. // vertexType indicates the type of the API object the vertex represents.
  92. // represented as a byte to minimize space used in the vertices.
  93. type vertexType byte
  94. const (
  95. configMapVertexType vertexType = iota
  96. nodeVertexType
  97. podVertexType
  98. pvcVertexType
  99. pvVertexType
  100. secretVertexType
  101. vaVertexType
  102. serviceAccountVertexType
  103. )
  104. var vertexTypes = map[vertexType]string{
  105. configMapVertexType: "configmap",
  106. nodeVertexType: "node",
  107. podVertexType: "pod",
  108. pvcVertexType: "pvc",
  109. pvVertexType: "pv",
  110. secretVertexType: "secret",
  111. vaVertexType: "volumeattachment",
  112. serviceAccountVertexType: "serviceAccount",
  113. }
  114. // must be called under a write lock
  115. func (g *Graph) getOrCreateVertex_locked(vertexType vertexType, namespace, name string) *namedVertex {
  116. if vertex, exists := g.getVertex_rlocked(vertexType, namespace, name); exists {
  117. return vertex
  118. }
  119. return g.createVertex_locked(vertexType, namespace, name)
  120. }
  121. // must be called under a read lock
  122. func (g *Graph) getVertex_rlocked(vertexType vertexType, namespace, name string) (*namedVertex, bool) {
  123. vertex, exists := g.vertices[vertexType][namespace][name]
  124. return vertex, exists
  125. }
  126. // must be called under a write lock
  127. func (g *Graph) createVertex_locked(vertexType vertexType, namespace, name string) *namedVertex {
  128. typedVertices, exists := g.vertices[vertexType]
  129. if !exists {
  130. typedVertices = namespaceVertexMapping{}
  131. g.vertices[vertexType] = typedVertices
  132. }
  133. namespacedVertices, exists := typedVertices[namespace]
  134. if !exists {
  135. namespacedVertices = map[string]*namedVertex{}
  136. typedVertices[namespace] = namespacedVertices
  137. }
  138. vertex := newNamedVertex(vertexType, namespace, name, g.graph.NewNodeID())
  139. namespacedVertices[name] = vertex
  140. g.graph.AddNode(vertex)
  141. return vertex
  142. }
  143. // must be called under write lock
  144. func (g *Graph) deleteVertex_locked(vertexType vertexType, namespace, name string) {
  145. vertex, exists := g.getVertex_rlocked(vertexType, namespace, name)
  146. if !exists {
  147. return
  148. }
  149. // find existing neighbors with a single edge (meaning we are their only neighbor)
  150. neighborsToRemove := []graph.Node{}
  151. edgesToRemoveFromIndexes := []graph.Edge{}
  152. g.graph.VisitFrom(vertex, func(neighbor graph.Node) bool {
  153. // this downstream neighbor has only one edge (which must be from us), so remove them as well
  154. if g.graph.Degree(neighbor) == 1 {
  155. neighborsToRemove = append(neighborsToRemove, neighbor)
  156. }
  157. return true
  158. })
  159. g.graph.VisitTo(vertex, func(neighbor graph.Node) bool {
  160. if g.graph.Degree(neighbor) == 1 {
  161. // this upstream neighbor has only one edge (which must be to us), so remove them as well
  162. neighborsToRemove = append(neighborsToRemove, neighbor)
  163. } else {
  164. // decrement the destination edge index on this neighbor if the edge between us was a destination edge
  165. edgesToRemoveFromIndexes = append(edgesToRemoveFromIndexes, g.graph.EdgeBetween(vertex, neighbor))
  166. }
  167. return true
  168. })
  169. // remove the vertex
  170. g.removeVertex_locked(vertex)
  171. // remove neighbors that are now edgeless
  172. for _, neighbor := range neighborsToRemove {
  173. g.removeVertex_locked(neighbor.(*namedVertex))
  174. }
  175. // remove edges from destination indexes for neighbors that dropped outbound edges
  176. for _, edge := range edgesToRemoveFromIndexes {
  177. g.removeEdgeFromDestinationIndex_locked(edge)
  178. }
  179. }
  180. // must be called under write lock
  181. // deletes edges from a given vertex type to a specific vertex
  182. // will delete each orphaned "from" vertex, but will never delete the "to" vertex
  183. func (g *Graph) deleteEdges_locked(fromType, toType vertexType, toNamespace, toName string) {
  184. // get the "to" side
  185. toVert, exists := g.getVertex_rlocked(toType, toNamespace, toName)
  186. if !exists {
  187. return
  188. }
  189. // delete all edges between vertices of fromType and toVert
  190. neighborsToRemove := []*namedVertex{}
  191. edgesToRemove := []graph.Edge{}
  192. g.graph.VisitTo(toVert, func(from graph.Node) bool {
  193. fromVert := from.(*namedVertex)
  194. if fromVert.vertexType != fromType {
  195. return true
  196. }
  197. // this neighbor has only one edge (which must be to us), so remove them as well
  198. if g.graph.Degree(fromVert) == 1 {
  199. neighborsToRemove = append(neighborsToRemove, fromVert)
  200. } else {
  201. edgesToRemove = append(edgesToRemove, g.graph.EdgeBetween(from, toVert))
  202. }
  203. return true
  204. })
  205. // clean up orphaned verts
  206. for _, v := range neighborsToRemove {
  207. g.removeVertex_locked(v)
  208. }
  209. // remove edges and decrement destination indexes for neighbors that dropped outbound edges
  210. for _, edge := range edgesToRemove {
  211. g.graph.RemoveEdge(edge)
  212. g.removeEdgeFromDestinationIndex_locked(edge)
  213. }
  214. }
  215. // A fastpath for recomputeDestinationIndex_locked for "removing edge" case.
  216. func (g *Graph) removeEdgeFromDestinationIndex_locked(e graph.Edge) {
  217. n := e.From()
  218. // don't maintain indices for nodes with few edges
  219. edgeCount := g.graph.Degree(n)
  220. if edgeCount < g.destinationEdgeThreshold {
  221. delete(g.destinationEdgeIndex, n.ID())
  222. return
  223. }
  224. // decrement the nodeID->destinationID refcount in the index, if the index exists
  225. index := g.destinationEdgeIndex[n.ID()]
  226. if index == nil {
  227. return
  228. }
  229. if destinationEdge, ok := e.(*destinationEdge); ok {
  230. index.decrement(destinationEdge.DestinationID())
  231. }
  232. }
  233. // A fastpath for recomputeDestinationIndex_locked for "adding edge case".
  234. func (g *Graph) addEdgeToDestinationIndex_locked(e graph.Edge) {
  235. n := e.From()
  236. index := g.destinationEdgeIndex[n.ID()]
  237. if index == nil {
  238. // There is no index, use the full index computation method
  239. g.recomputeDestinationIndex_locked(n)
  240. return
  241. }
  242. // fast-add the new edge to an existing index
  243. if destinationEdge, ok := e.(*destinationEdge); ok {
  244. index.increment(destinationEdge.DestinationID())
  245. }
  246. }
  247. // must be called under write lock
  248. // removeVertex_locked removes the specified vertex from the graph and from the maintained indices.
  249. // It does nothing to indexes of neighbor vertices.
  250. func (g *Graph) removeVertex_locked(v *namedVertex) {
  251. g.graph.RemoveNode(v)
  252. delete(g.destinationEdgeIndex, v.ID())
  253. delete(g.vertices[v.vertexType][v.namespace], v.name)
  254. if len(g.vertices[v.vertexType][v.namespace]) == 0 {
  255. delete(g.vertices[v.vertexType], v.namespace)
  256. }
  257. }
  258. // must be called under write lock
  259. // recomputeDestinationIndex_locked recomputes the index of destination ids for the specified vertex
  260. func (g *Graph) recomputeDestinationIndex_locked(n graph.Node) {
  261. // don't maintain indices for nodes with few edges
  262. edgeCount := g.graph.Degree(n)
  263. if edgeCount < g.destinationEdgeThreshold {
  264. delete(g.destinationEdgeIndex, n.ID())
  265. return
  266. }
  267. // get or create the index
  268. index := g.destinationEdgeIndex[n.ID()]
  269. if index == nil {
  270. index = newIntSet()
  271. } else {
  272. index.reset()
  273. }
  274. // populate the index
  275. g.graph.VisitFrom(n, func(dest graph.Node) bool {
  276. if destinationEdge, ok := g.graph.EdgeBetween(n, dest).(*destinationEdge); ok {
  277. index.increment(destinationEdge.DestinationID())
  278. }
  279. return true
  280. })
  281. g.destinationEdgeIndex[n.ID()] = index
  282. }
  283. // AddPod should only be called once spec.NodeName is populated.
  284. // It sets up edges for the following relationships (which are immutable for a pod once bound to a node):
  285. //
  286. // pod -> node
  287. //
  288. // secret -> pod
  289. // configmap -> pod
  290. // pvc -> pod
  291. // svcacct -> pod
  292. func (g *Graph) AddPod(pod *corev1.Pod) {
  293. g.lock.Lock()
  294. defer g.lock.Unlock()
  295. g.deleteVertex_locked(podVertexType, pod.Namespace, pod.Name)
  296. podVertex := g.getOrCreateVertex_locked(podVertexType, pod.Namespace, pod.Name)
  297. nodeVertex := g.getOrCreateVertex_locked(nodeVertexType, "", pod.Spec.NodeName)
  298. g.graph.SetEdge(newDestinationEdge(podVertex, nodeVertex, nodeVertex))
  299. // Short-circuit adding edges to other resources for mirror pods.
  300. // A node must never be able to create a pod that grants them permissions on other API objects.
  301. // The NodeRestriction admission plugin prevents creation of such pods, but short-circuiting here gives us defense in depth.
  302. if _, isMirrorPod := pod.Annotations[corev1.MirrorPodAnnotationKey]; isMirrorPod {
  303. return
  304. }
  305. // TODO(mikedanese): If the pod doesn't mount the service account secrets,
  306. // should the node still get access to the service account?
  307. //
  308. // ref https://github.com/kubernetes/kubernetes/issues/58790
  309. if len(pod.Spec.ServiceAccountName) > 0 {
  310. serviceAccountVertex := g.getOrCreateVertex_locked(serviceAccountVertexType, pod.Namespace, pod.Spec.ServiceAccountName)
  311. e := newDestinationEdge(serviceAccountVertex, podVertex, nodeVertex)
  312. g.graph.SetEdge(e)
  313. g.addEdgeToDestinationIndex_locked(e)
  314. }
  315. podutil.VisitPodSecretNames(pod, func(secret string) bool {
  316. secretVertex := g.getOrCreateVertex_locked(secretVertexType, pod.Namespace, secret)
  317. e := newDestinationEdge(secretVertex, podVertex, nodeVertex)
  318. g.graph.SetEdge(e)
  319. g.addEdgeToDestinationIndex_locked(e)
  320. return true
  321. })
  322. podutil.VisitPodConfigmapNames(pod, func(configmap string) bool {
  323. configmapVertex := g.getOrCreateVertex_locked(configMapVertexType, pod.Namespace, configmap)
  324. e := newDestinationEdge(configmapVertex, podVertex, nodeVertex)
  325. g.graph.SetEdge(e)
  326. g.addEdgeToDestinationIndex_locked(e)
  327. return true
  328. })
  329. for _, v := range pod.Spec.Volumes {
  330. if v.PersistentVolumeClaim != nil {
  331. pvcVertex := g.getOrCreateVertex_locked(pvcVertexType, pod.Namespace, v.PersistentVolumeClaim.ClaimName)
  332. e := newDestinationEdge(pvcVertex, podVertex, nodeVertex)
  333. g.graph.SetEdge(e)
  334. g.addEdgeToDestinationIndex_locked(e)
  335. }
  336. }
  337. }
  338. func (g *Graph) DeletePod(name, namespace string) {
  339. g.lock.Lock()
  340. defer g.lock.Unlock()
  341. g.deleteVertex_locked(podVertexType, namespace, name)
  342. }
  343. // AddPV sets up edges for the following relationships:
  344. //
  345. // secret -> pv
  346. //
  347. // pv -> pvc
  348. func (g *Graph) AddPV(pv *corev1.PersistentVolume) {
  349. g.lock.Lock()
  350. defer g.lock.Unlock()
  351. // clear existing edges
  352. g.deleteVertex_locked(pvVertexType, "", pv.Name)
  353. // if we have a pvc, establish new edges
  354. if pv.Spec.ClaimRef != nil {
  355. pvVertex := g.getOrCreateVertex_locked(pvVertexType, "", pv.Name)
  356. // since we don't know the other end of the pvc -> pod -> node chain (or it may not even exist yet), we can't decorate these edges with kubernetes node info
  357. g.graph.SetEdge(simple.Edge{F: pvVertex, T: g.getOrCreateVertex_locked(pvcVertexType, pv.Spec.ClaimRef.Namespace, pv.Spec.ClaimRef.Name)})
  358. pvutil.VisitPVSecretNames(pv, func(namespace, secret string, kubeletVisible bool) bool {
  359. // This grants access to the named secret in the same namespace as the bound PVC
  360. if kubeletVisible {
  361. g.graph.SetEdge(simple.Edge{F: g.getOrCreateVertex_locked(secretVertexType, namespace, secret), T: pvVertex})
  362. }
  363. return true
  364. })
  365. }
  366. }
  367. func (g *Graph) DeletePV(name string) {
  368. g.lock.Lock()
  369. defer g.lock.Unlock()
  370. g.deleteVertex_locked(pvVertexType, "", name)
  371. }
  372. // AddVolumeAttachment sets up edges for the following relationships:
  373. //
  374. // volume attachment -> node
  375. func (g *Graph) AddVolumeAttachment(attachmentName, nodeName string) {
  376. g.lock.Lock()
  377. defer g.lock.Unlock()
  378. // clear existing edges
  379. g.deleteVertex_locked(vaVertexType, "", attachmentName)
  380. // if we have a node, establish new edges
  381. if len(nodeName) > 0 {
  382. vaVertex := g.getOrCreateVertex_locked(vaVertexType, "", attachmentName)
  383. nodeVertex := g.getOrCreateVertex_locked(nodeVertexType, "", nodeName)
  384. g.graph.SetEdge(newDestinationEdge(vaVertex, nodeVertex, nodeVertex))
  385. }
  386. }
  387. func (g *Graph) DeleteVolumeAttachment(name string) {
  388. g.lock.Lock()
  389. defer g.lock.Unlock()
  390. g.deleteVertex_locked(vaVertexType, "", name)
  391. }
  392. // SetNodeConfigMap sets up edges for the Node.Spec.ConfigSource.ConfigMap relationship:
  393. //
  394. // configmap -> node
  395. func (g *Graph) SetNodeConfigMap(nodeName, configMapName, configMapNamespace string) {
  396. g.lock.Lock()
  397. defer g.lock.Unlock()
  398. // TODO(mtaufen): ensure len(nodeName) > 0 in all cases (would sure be nice to have a dependently-typed language here...)
  399. // clear edges configmaps -> node where the destination is the current node *only*
  400. // at present, a node can only have one *direct* configmap reference at a time
  401. g.deleteEdges_locked(configMapVertexType, nodeVertexType, "", nodeName)
  402. // establish new edges if we have a real ConfigMap to reference
  403. if len(configMapName) > 0 && len(configMapNamespace) > 0 {
  404. configmapVertex := g.getOrCreateVertex_locked(configMapVertexType, configMapNamespace, configMapName)
  405. nodeVertex := g.getOrCreateVertex_locked(nodeVertexType, "", nodeName)
  406. e := newDestinationEdge(configmapVertex, nodeVertex, nodeVertex)
  407. g.graph.SetEdge(e)
  408. g.addEdgeToDestinationIndex_locked(e)
  409. }
  410. }