123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471 |
- /*
- Copyright 2017 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package node
- import (
- "sync"
- corev1 "k8s.io/api/core/v1"
- pvutil "k8s.io/kubernetes/pkg/api/v1/persistentvolume"
- podutil "k8s.io/kubernetes/pkg/api/v1/pod"
- "k8s.io/kubernetes/third_party/forked/gonum/graph"
- "k8s.io/kubernetes/third_party/forked/gonum/graph/simple"
- )
- // namedVertex implements graph.Node and remembers the type, namespace, and name of its related API object
- type namedVertex struct {
- name string
- namespace string
- id int
- vertexType vertexType
- }
- func newNamedVertex(vertexType vertexType, namespace, name string, id int) *namedVertex {
- return &namedVertex{
- vertexType: vertexType,
- name: name,
- namespace: namespace,
- id: id,
- }
- }
- func (n *namedVertex) ID() int {
- return n.id
- }
- func (n *namedVertex) String() string {
- if len(n.namespace) == 0 {
- return vertexTypes[n.vertexType] + ":" + n.name
- }
- return vertexTypes[n.vertexType] + ":" + n.namespace + "/" + n.name
- }
- // destinationEdge is a graph edge that includes a denormalized reference to the final destination vertex.
- // This should only be used when there is a single leaf vertex reachable from T.
- type destinationEdge struct {
- F graph.Node
- T graph.Node
- Destination graph.Node
- }
- func newDestinationEdge(from, to, destination graph.Node) graph.Edge {
- return &destinationEdge{F: from, T: to, Destination: destination}
- }
- func (e *destinationEdge) From() graph.Node { return e.F }
- func (e *destinationEdge) To() graph.Node { return e.T }
- func (e *destinationEdge) Weight() float64 { return 0 }
- func (e *destinationEdge) DestinationID() int { return e.Destination.ID() }
- // Graph holds graph vertices and a way to look up a vertex for a particular API type/namespace/name.
- // All edges point toward the vertices representing Kubernetes nodes:
- //
- // node <- pod
- // pod <- secret,configmap,pvc
- // pvc <- pv
- // pv <- secret
- type Graph struct {
- lock sync.RWMutex
- graph *simple.DirectedAcyclicGraph
- // vertices is a map of type -> namespace -> name -> vertex
- vertices map[vertexType]namespaceVertexMapping
- // destinationEdgeIndex is a map of vertex -> set of destination IDs
- destinationEdgeIndex map[int]*intSet
- // destinationEdgeThreshold is the minimum number of distinct destination IDs at which to maintain an index
- destinationEdgeThreshold int
- }
- // namespaceVertexMapping is a map of namespace -> name -> vertex
- type namespaceVertexMapping map[string]nameVertexMapping
- // nameVertexMapping is a map of name -> vertex
- type nameVertexMapping map[string]*namedVertex
- func NewGraph() *Graph {
- return &Graph{
- vertices: map[vertexType]namespaceVertexMapping{},
- graph: simple.NewDirectedAcyclicGraph(0, 0),
- destinationEdgeIndex: map[int]*intSet{},
- // experimentally determined to be the point at which iteration adds an order of magnitude to the authz check.
- // since maintaining indexes costs time/memory while processing graph changes, we don't want to make this too low.
- destinationEdgeThreshold: 200,
- }
- }
- // vertexType indicates the type of the API object the vertex represents.
- // represented as a byte to minimize space used in the vertices.
- type vertexType byte
- const (
- configMapVertexType vertexType = iota
- nodeVertexType
- podVertexType
- pvcVertexType
- pvVertexType
- secretVertexType
- vaVertexType
- serviceAccountVertexType
- )
- var vertexTypes = map[vertexType]string{
- configMapVertexType: "configmap",
- nodeVertexType: "node",
- podVertexType: "pod",
- pvcVertexType: "pvc",
- pvVertexType: "pv",
- secretVertexType: "secret",
- vaVertexType: "volumeattachment",
- serviceAccountVertexType: "serviceAccount",
- }
- // must be called under a write lock
- func (g *Graph) getOrCreateVertex_locked(vertexType vertexType, namespace, name string) *namedVertex {
- if vertex, exists := g.getVertex_rlocked(vertexType, namespace, name); exists {
- return vertex
- }
- return g.createVertex_locked(vertexType, namespace, name)
- }
- // must be called under a read lock
- func (g *Graph) getVertex_rlocked(vertexType vertexType, namespace, name string) (*namedVertex, bool) {
- vertex, exists := g.vertices[vertexType][namespace][name]
- return vertex, exists
- }
- // must be called under a write lock
- func (g *Graph) createVertex_locked(vertexType vertexType, namespace, name string) *namedVertex {
- typedVertices, exists := g.vertices[vertexType]
- if !exists {
- typedVertices = namespaceVertexMapping{}
- g.vertices[vertexType] = typedVertices
- }
- namespacedVertices, exists := typedVertices[namespace]
- if !exists {
- namespacedVertices = map[string]*namedVertex{}
- typedVertices[namespace] = namespacedVertices
- }
- vertex := newNamedVertex(vertexType, namespace, name, g.graph.NewNodeID())
- namespacedVertices[name] = vertex
- g.graph.AddNode(vertex)
- return vertex
- }
- // must be called under write lock
- func (g *Graph) deleteVertex_locked(vertexType vertexType, namespace, name string) {
- vertex, exists := g.getVertex_rlocked(vertexType, namespace, name)
- if !exists {
- return
- }
- // find existing neighbors with a single edge (meaning we are their only neighbor)
- neighborsToRemove := []graph.Node{}
- edgesToRemoveFromIndexes := []graph.Edge{}
- g.graph.VisitFrom(vertex, func(neighbor graph.Node) bool {
- // this downstream neighbor has only one edge (which must be from us), so remove them as well
- if g.graph.Degree(neighbor) == 1 {
- neighborsToRemove = append(neighborsToRemove, neighbor)
- }
- return true
- })
- g.graph.VisitTo(vertex, func(neighbor graph.Node) bool {
- if g.graph.Degree(neighbor) == 1 {
- // this upstream neighbor has only one edge (which must be to us), so remove them as well
- neighborsToRemove = append(neighborsToRemove, neighbor)
- } else {
- // decrement the destination edge index on this neighbor if the edge between us was a destination edge
- edgesToRemoveFromIndexes = append(edgesToRemoveFromIndexes, g.graph.EdgeBetween(vertex, neighbor))
- }
- return true
- })
- // remove the vertex
- g.removeVertex_locked(vertex)
- // remove neighbors that are now edgeless
- for _, neighbor := range neighborsToRemove {
- g.removeVertex_locked(neighbor.(*namedVertex))
- }
- // remove edges from destination indexes for neighbors that dropped outbound edges
- for _, edge := range edgesToRemoveFromIndexes {
- g.removeEdgeFromDestinationIndex_locked(edge)
- }
- }
- // must be called under write lock
- // deletes edges from a given vertex type to a specific vertex
- // will delete each orphaned "from" vertex, but will never delete the "to" vertex
- func (g *Graph) deleteEdges_locked(fromType, toType vertexType, toNamespace, toName string) {
- // get the "to" side
- toVert, exists := g.getVertex_rlocked(toType, toNamespace, toName)
- if !exists {
- return
- }
- // delete all edges between vertices of fromType and toVert
- neighborsToRemove := []*namedVertex{}
- edgesToRemove := []graph.Edge{}
- g.graph.VisitTo(toVert, func(from graph.Node) bool {
- fromVert := from.(*namedVertex)
- if fromVert.vertexType != fromType {
- return true
- }
- // this neighbor has only one edge (which must be to us), so remove them as well
- if g.graph.Degree(fromVert) == 1 {
- neighborsToRemove = append(neighborsToRemove, fromVert)
- } else {
- edgesToRemove = append(edgesToRemove, g.graph.EdgeBetween(from, toVert))
- }
- return true
- })
- // clean up orphaned verts
- for _, v := range neighborsToRemove {
- g.removeVertex_locked(v)
- }
- // remove edges and decrement destination indexes for neighbors that dropped outbound edges
- for _, edge := range edgesToRemove {
- g.graph.RemoveEdge(edge)
- g.removeEdgeFromDestinationIndex_locked(edge)
- }
- }
- // A fastpath for recomputeDestinationIndex_locked for "removing edge" case.
- func (g *Graph) removeEdgeFromDestinationIndex_locked(e graph.Edge) {
- n := e.From()
- // don't maintain indices for nodes with few edges
- edgeCount := g.graph.Degree(n)
- if edgeCount < g.destinationEdgeThreshold {
- delete(g.destinationEdgeIndex, n.ID())
- return
- }
- // decrement the nodeID->destinationID refcount in the index, if the index exists
- index := g.destinationEdgeIndex[n.ID()]
- if index == nil {
- return
- }
- if destinationEdge, ok := e.(*destinationEdge); ok {
- index.decrement(destinationEdge.DestinationID())
- }
- }
- // A fastpath for recomputeDestinationIndex_locked for "adding edge case".
- func (g *Graph) addEdgeToDestinationIndex_locked(e graph.Edge) {
- n := e.From()
- index := g.destinationEdgeIndex[n.ID()]
- if index == nil {
- // There is no index, use the full index computation method
- g.recomputeDestinationIndex_locked(n)
- return
- }
- // fast-add the new edge to an existing index
- if destinationEdge, ok := e.(*destinationEdge); ok {
- index.increment(destinationEdge.DestinationID())
- }
- }
- // must be called under write lock
- // removeVertex_locked removes the specified vertex from the graph and from the maintained indices.
- // It does nothing to indexes of neighbor vertices.
- func (g *Graph) removeVertex_locked(v *namedVertex) {
- g.graph.RemoveNode(v)
- delete(g.destinationEdgeIndex, v.ID())
- delete(g.vertices[v.vertexType][v.namespace], v.name)
- if len(g.vertices[v.vertexType][v.namespace]) == 0 {
- delete(g.vertices[v.vertexType], v.namespace)
- }
- }
- // must be called under write lock
- // recomputeDestinationIndex_locked recomputes the index of destination ids for the specified vertex
- func (g *Graph) recomputeDestinationIndex_locked(n graph.Node) {
- // don't maintain indices for nodes with few edges
- edgeCount := g.graph.Degree(n)
- if edgeCount < g.destinationEdgeThreshold {
- delete(g.destinationEdgeIndex, n.ID())
- return
- }
- // get or create the index
- index := g.destinationEdgeIndex[n.ID()]
- if index == nil {
- index = newIntSet()
- } else {
- index.reset()
- }
- // populate the index
- g.graph.VisitFrom(n, func(dest graph.Node) bool {
- if destinationEdge, ok := g.graph.EdgeBetween(n, dest).(*destinationEdge); ok {
- index.increment(destinationEdge.DestinationID())
- }
- return true
- })
- g.destinationEdgeIndex[n.ID()] = index
- }
- // AddPod should only be called once spec.NodeName is populated.
- // It sets up edges for the following relationships (which are immutable for a pod once bound to a node):
- //
- // pod -> node
- //
- // secret -> pod
- // configmap -> pod
- // pvc -> pod
- // svcacct -> pod
- func (g *Graph) AddPod(pod *corev1.Pod) {
- g.lock.Lock()
- defer g.lock.Unlock()
- g.deleteVertex_locked(podVertexType, pod.Namespace, pod.Name)
- podVertex := g.getOrCreateVertex_locked(podVertexType, pod.Namespace, pod.Name)
- nodeVertex := g.getOrCreateVertex_locked(nodeVertexType, "", pod.Spec.NodeName)
- g.graph.SetEdge(newDestinationEdge(podVertex, nodeVertex, nodeVertex))
- // Short-circuit adding edges to other resources for mirror pods.
- // A node must never be able to create a pod that grants them permissions on other API objects.
- // The NodeRestriction admission plugin prevents creation of such pods, but short-circuiting here gives us defense in depth.
- if _, isMirrorPod := pod.Annotations[corev1.MirrorPodAnnotationKey]; isMirrorPod {
- return
- }
- // TODO(mikedanese): If the pod doesn't mount the service account secrets,
- // should the node still get access to the service account?
- //
- // ref https://github.com/kubernetes/kubernetes/issues/58790
- if len(pod.Spec.ServiceAccountName) > 0 {
- serviceAccountVertex := g.getOrCreateVertex_locked(serviceAccountVertexType, pod.Namespace, pod.Spec.ServiceAccountName)
- e := newDestinationEdge(serviceAccountVertex, podVertex, nodeVertex)
- g.graph.SetEdge(e)
- g.addEdgeToDestinationIndex_locked(e)
- }
- podutil.VisitPodSecretNames(pod, func(secret string) bool {
- secretVertex := g.getOrCreateVertex_locked(secretVertexType, pod.Namespace, secret)
- e := newDestinationEdge(secretVertex, podVertex, nodeVertex)
- g.graph.SetEdge(e)
- g.addEdgeToDestinationIndex_locked(e)
- return true
- })
- podutil.VisitPodConfigmapNames(pod, func(configmap string) bool {
- configmapVertex := g.getOrCreateVertex_locked(configMapVertexType, pod.Namespace, configmap)
- e := newDestinationEdge(configmapVertex, podVertex, nodeVertex)
- g.graph.SetEdge(e)
- g.addEdgeToDestinationIndex_locked(e)
- return true
- })
- for _, v := range pod.Spec.Volumes {
- if v.PersistentVolumeClaim != nil {
- pvcVertex := g.getOrCreateVertex_locked(pvcVertexType, pod.Namespace, v.PersistentVolumeClaim.ClaimName)
- e := newDestinationEdge(pvcVertex, podVertex, nodeVertex)
- g.graph.SetEdge(e)
- g.addEdgeToDestinationIndex_locked(e)
- }
- }
- }
- func (g *Graph) DeletePod(name, namespace string) {
- g.lock.Lock()
- defer g.lock.Unlock()
- g.deleteVertex_locked(podVertexType, namespace, name)
- }
- // AddPV sets up edges for the following relationships:
- //
- // secret -> pv
- //
- // pv -> pvc
- func (g *Graph) AddPV(pv *corev1.PersistentVolume) {
- g.lock.Lock()
- defer g.lock.Unlock()
- // clear existing edges
- g.deleteVertex_locked(pvVertexType, "", pv.Name)
- // if we have a pvc, establish new edges
- if pv.Spec.ClaimRef != nil {
- pvVertex := g.getOrCreateVertex_locked(pvVertexType, "", pv.Name)
- // since we don't know the other end of the pvc -> pod -> node chain (or it may not even exist yet), we can't decorate these edges with kubernetes node info
- g.graph.SetEdge(simple.Edge{F: pvVertex, T: g.getOrCreateVertex_locked(pvcVertexType, pv.Spec.ClaimRef.Namespace, pv.Spec.ClaimRef.Name)})
- pvutil.VisitPVSecretNames(pv, func(namespace, secret string, kubeletVisible bool) bool {
- // This grants access to the named secret in the same namespace as the bound PVC
- if kubeletVisible {
- g.graph.SetEdge(simple.Edge{F: g.getOrCreateVertex_locked(secretVertexType, namespace, secret), T: pvVertex})
- }
- return true
- })
- }
- }
- func (g *Graph) DeletePV(name string) {
- g.lock.Lock()
- defer g.lock.Unlock()
- g.deleteVertex_locked(pvVertexType, "", name)
- }
- // AddVolumeAttachment sets up edges for the following relationships:
- //
- // volume attachment -> node
- func (g *Graph) AddVolumeAttachment(attachmentName, nodeName string) {
- g.lock.Lock()
- defer g.lock.Unlock()
- // clear existing edges
- g.deleteVertex_locked(vaVertexType, "", attachmentName)
- // if we have a node, establish new edges
- if len(nodeName) > 0 {
- vaVertex := g.getOrCreateVertex_locked(vaVertexType, "", attachmentName)
- nodeVertex := g.getOrCreateVertex_locked(nodeVertexType, "", nodeName)
- g.graph.SetEdge(newDestinationEdge(vaVertex, nodeVertex, nodeVertex))
- }
- }
- func (g *Graph) DeleteVolumeAttachment(name string) {
- g.lock.Lock()
- defer g.lock.Unlock()
- g.deleteVertex_locked(vaVertexType, "", name)
- }
- // SetNodeConfigMap sets up edges for the Node.Spec.ConfigSource.ConfigMap relationship:
- //
- // configmap -> node
- func (g *Graph) SetNodeConfigMap(nodeName, configMapName, configMapNamespace string) {
- g.lock.Lock()
- defer g.lock.Unlock()
- // TODO(mtaufen): ensure len(nodeName) > 0 in all cases (would sure be nice to have a dependently-typed language here...)
- // clear edges configmaps -> node where the destination is the current node *only*
- // at present, a node can only have one *direct* configmap reference at a time
- g.deleteEdges_locked(configMapVertexType, nodeVertexType, "", nodeName)
- // establish new edges if we have a real ConfigMap to reference
- if len(configMapName) > 0 && len(configMapNamespace) > 0 {
- configmapVertex := g.getOrCreateVertex_locked(configMapVertexType, configMapNamespace, configMapName)
- nodeVertex := g.getOrCreateVertex_locked(nodeVertexType, "", nodeName)
- e := newDestinationEdge(configmapVertex, nodeVertex, nodeVertex)
- g.graph.SetEdge(e)
- g.addEdgeToDestinationIndex_locked(e)
- }
- }
|