volume_restrictions.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package volumerestrictions
  14. import (
  15. "context"
  16. v1 "k8s.io/api/core/v1"
  17. "k8s.io/apimachinery/pkg/runtime"
  18. framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
  19. "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
  20. )
  21. // VolumeRestrictions is a plugin that checks volume restrictions.
  22. type VolumeRestrictions struct{}
  23. var _ framework.FilterPlugin = &VolumeRestrictions{}
  24. // Name is the name of the plugin used in the plugin registry and configurations.
  25. const Name = "VolumeRestrictions"
  26. const (
  27. // ErrReasonDiskConflict is used for NoDiskConflict predicate error.
  28. ErrReasonDiskConflict = "node(s) had no available disk"
  29. )
  30. // Name returns name of the plugin. It is used in logs, etc.
  31. func (pl *VolumeRestrictions) Name() string {
  32. return Name
  33. }
  34. func isVolumeConflict(volume v1.Volume, pod *v1.Pod) bool {
  35. // fast path if there is no conflict checking targets.
  36. if volume.GCEPersistentDisk == nil && volume.AWSElasticBlockStore == nil && volume.RBD == nil && volume.ISCSI == nil {
  37. return false
  38. }
  39. for _, existingVolume := range pod.Spec.Volumes {
  40. // Same GCE disk mounted by multiple pods conflicts unless all pods mount it read-only.
  41. if volume.GCEPersistentDisk != nil && existingVolume.GCEPersistentDisk != nil {
  42. disk, existingDisk := volume.GCEPersistentDisk, existingVolume.GCEPersistentDisk
  43. if disk.PDName == existingDisk.PDName && !(disk.ReadOnly && existingDisk.ReadOnly) {
  44. return true
  45. }
  46. }
  47. if volume.AWSElasticBlockStore != nil && existingVolume.AWSElasticBlockStore != nil {
  48. if volume.AWSElasticBlockStore.VolumeID == existingVolume.AWSElasticBlockStore.VolumeID {
  49. return true
  50. }
  51. }
  52. if volume.ISCSI != nil && existingVolume.ISCSI != nil {
  53. iqn := volume.ISCSI.IQN
  54. eiqn := existingVolume.ISCSI.IQN
  55. // two ISCSI volumes are same, if they share the same iqn. As iscsi volumes are of type
  56. // RWO or ROX, we could permit only one RW mount. Same iscsi volume mounted by multiple Pods
  57. // conflict unless all other pods mount as read only.
  58. if iqn == eiqn && !(volume.ISCSI.ReadOnly && existingVolume.ISCSI.ReadOnly) {
  59. return true
  60. }
  61. }
  62. if volume.RBD != nil && existingVolume.RBD != nil {
  63. mon, pool, image := volume.RBD.CephMonitors, volume.RBD.RBDPool, volume.RBD.RBDImage
  64. emon, epool, eimage := existingVolume.RBD.CephMonitors, existingVolume.RBD.RBDPool, existingVolume.RBD.RBDImage
  65. // two RBDs images are the same if they share the same Ceph monitor, are in the same RADOS Pool, and have the same image name
  66. // only one read-write mount is permitted for the same RBD image.
  67. // same RBD image mounted by multiple Pods conflicts unless all Pods mount the image read-only
  68. if haveOverlap(mon, emon) && pool == epool && image == eimage && !(volume.RBD.ReadOnly && existingVolume.RBD.ReadOnly) {
  69. return true
  70. }
  71. }
  72. }
  73. return false
  74. }
  75. // haveOverlap searches two arrays and returns true if they have at least one common element; returns false otherwise.
  76. func haveOverlap(a1, a2 []string) bool {
  77. if len(a1) > len(a2) {
  78. a1, a2 = a2, a1
  79. }
  80. m := map[string]bool{}
  81. for _, val := range a1 {
  82. m[val] = true
  83. }
  84. for _, val := range a2 {
  85. if _, ok := m[val]; ok {
  86. return true
  87. }
  88. }
  89. return false
  90. }
  91. // Filter invoked at the filter extension point.
  92. // It evaluates if a pod can fit due to the volumes it requests, and those that
  93. // are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume
  94. // can't be scheduled there.
  95. // This is GCE, Amazon EBS, ISCSI and Ceph RBD specific for now:
  96. // - GCE PD allows multiple mounts as long as they're all read-only
  97. // - AWS EBS forbids any two pods mounting the same volume ID
  98. // - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image, and the image is read-only
  99. // - ISCSI forbids if any two pods share at least same IQN and ISCSI volume is read-only
  100. func (pl *VolumeRestrictions) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
  101. for _, v := range pod.Spec.Volumes {
  102. for _, ev := range nodeInfo.Pods() {
  103. if isVolumeConflict(v, ev) {
  104. return framework.NewStatus(framework.Unschedulable, ErrReasonDiskConflict)
  105. }
  106. }
  107. }
  108. return nil
  109. }
  110. // New initializes a new plugin and returns it.
  111. func New(_ *runtime.Unknown, _ framework.FrameworkHandle) (framework.Plugin, error) {
  112. return &VolumeRestrictions{}, nil
  113. }