gce_util.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. // +build !providerless
  2. /*
  3. Copyright 2014 The Kubernetes Authors.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. */
  14. package gcepd
  15. import (
  16. "fmt"
  17. "path/filepath"
  18. "regexp"
  19. "strings"
  20. "time"
  21. "k8s.io/klog"
  22. "k8s.io/utils/exec"
  23. "k8s.io/utils/mount"
  24. utilpath "k8s.io/utils/path"
  25. v1 "k8s.io/api/core/v1"
  26. "k8s.io/apimachinery/pkg/util/sets"
  27. cloudprovider "k8s.io/cloud-provider"
  28. cloudvolume "k8s.io/cloud-provider/volume"
  29. volumehelpers "k8s.io/cloud-provider/volume/helpers"
  30. "k8s.io/kubernetes/pkg/volume"
  31. volumeutil "k8s.io/kubernetes/pkg/volume/util"
  32. gcecloud "k8s.io/legacy-cloud-providers/gce"
  33. )
  34. const (
  35. diskByIDPath = "/dev/disk/by-id/"
  36. diskGooglePrefix = "google-"
  37. diskScsiGooglePrefix = "scsi-0Google_PersistentDisk_"
  38. diskPartitionSuffix = "-part"
  39. diskSDPath = "/dev/sd"
  40. diskSDPattern = "/dev/sd*"
  41. maxRetries = 10
  42. checkSleepDuration = time.Second
  43. maxRegionalPDZones = 2
  44. // Replication type constants must be lower case.
  45. replicationTypeNone = "none"
  46. replicationTypeRegionalPD = "regional-pd"
  47. // scsi_id output should be in the form of:
  48. // 0Google PersistentDisk <disk name>
  49. scsiPattern = `^0Google\s+PersistentDisk\s+([\S]+)\s*$`
  50. )
  51. var (
  52. // errorSleepDuration is modified only in unit tests and should be constant
  53. // otherwise.
  54. errorSleepDuration = 5 * time.Second
  55. // regex to parse scsi_id output and extract the serial
  56. scsiRegex = regexp.MustCompile(scsiPattern)
  57. )
  58. // GCEDiskUtil provides operation for GCE PD
  59. type GCEDiskUtil struct{}
  60. // DeleteVolume deletes a GCE PD
  61. // Returns: error
  62. func (util *GCEDiskUtil) DeleteVolume(d *gcePersistentDiskDeleter) error {
  63. cloud, err := getCloudProvider(d.gcePersistentDisk.plugin.host.GetCloudProvider())
  64. if err != nil {
  65. return err
  66. }
  67. if err = cloud.DeleteDisk(d.pdName); err != nil {
  68. klog.V(2).Infof("Error deleting GCE PD volume %s: %v", d.pdName, err)
  69. // GCE cloud provider returns volume.deletedVolumeInUseError when
  70. // necessary, no handling needed here.
  71. return err
  72. }
  73. klog.V(2).Infof("Successfully deleted GCE PD volume %s", d.pdName)
  74. return nil
  75. }
  76. // CreateVolume creates a GCE PD.
  77. // Returns: gcePDName, volumeSizeGB, labels, fsType, error
  78. func (util *GCEDiskUtil) CreateVolume(c *gcePersistentDiskProvisioner, node *v1.Node, allowedTopologies []v1.TopologySelectorTerm) (string, int, map[string]string, string, error) {
  79. cloud, err := getCloudProvider(c.gcePersistentDisk.plugin.host.GetCloudProvider())
  80. if err != nil {
  81. return "", 0, nil, "", err
  82. }
  83. name := volumeutil.GenerateVolumeName(c.options.ClusterName, c.options.PVName, 63) // GCE PD name can have up to 63 characters
  84. capacity := c.options.PVC.Spec.Resources.Requests[v1.ResourceName(v1.ResourceStorage)]
  85. // GCE PDs are allocated in chunks of GiBs
  86. requestGB := volumehelpers.RoundUpToGiB(capacity)
  87. // Apply Parameters.
  88. // Values for parameter "replication-type" are canonicalized to lower case.
  89. // Values for other parameters are case-insensitive, and we leave validation of these values
  90. // to the cloud provider.
  91. diskType := ""
  92. configuredZone := ""
  93. var configuredZones sets.String
  94. zonePresent := false
  95. zonesPresent := false
  96. replicationType := replicationTypeNone
  97. fstype := ""
  98. for k, v := range c.options.Parameters {
  99. switch strings.ToLower(k) {
  100. case "type":
  101. diskType = v
  102. case "zone":
  103. zonePresent = true
  104. configuredZone = v
  105. case "zones":
  106. zonesPresent = true
  107. configuredZones, err = volumehelpers.ZonesToSet(v)
  108. if err != nil {
  109. return "", 0, nil, "", err
  110. }
  111. case "replication-type":
  112. replicationType = strings.ToLower(v)
  113. case volume.VolumeParameterFSType:
  114. fstype = v
  115. default:
  116. return "", 0, nil, "", fmt.Errorf("invalid option %q for volume plugin %s", k, c.plugin.GetPluginName())
  117. }
  118. }
  119. // TODO: implement PVC.Selector parsing
  120. if c.options.PVC.Spec.Selector != nil {
  121. return "", 0, nil, "", fmt.Errorf("claim.Spec.Selector is not supported for dynamic provisioning on GCE")
  122. }
  123. var activezones sets.String
  124. activezones, err = cloud.GetAllCurrentZones()
  125. if err != nil {
  126. return "", 0, nil, "", err
  127. }
  128. var disk *gcecloud.Disk
  129. switch replicationType {
  130. case replicationTypeRegionalPD:
  131. selectedZones, err := volumehelpers.SelectZonesForVolume(zonePresent, zonesPresent, configuredZone, configuredZones, activezones, node, allowedTopologies, c.options.PVC.Name, maxRegionalPDZones)
  132. if err != nil {
  133. klog.V(2).Infof("Error selecting zones for regional GCE PD volume: %v", err)
  134. return "", 0, nil, "", err
  135. }
  136. disk, err = cloud.CreateRegionalDisk(
  137. name,
  138. diskType,
  139. selectedZones,
  140. int64(requestGB),
  141. *c.options.CloudTags)
  142. if err != nil {
  143. klog.V(2).Infof("Error creating regional GCE PD volume: %v", err)
  144. return "", 0, nil, "", err
  145. }
  146. klog.V(2).Infof("Successfully created Regional GCE PD volume %s", name)
  147. case replicationTypeNone:
  148. selectedZone, err := volumehelpers.SelectZoneForVolume(zonePresent, zonesPresent, configuredZone, configuredZones, activezones, node, allowedTopologies, c.options.PVC.Name)
  149. if err != nil {
  150. return "", 0, nil, "", err
  151. }
  152. disk, err = cloud.CreateDisk(
  153. name,
  154. diskType,
  155. selectedZone,
  156. int64(requestGB),
  157. *c.options.CloudTags)
  158. if err != nil {
  159. klog.V(2).Infof("Error creating single-zone GCE PD volume: %v", err)
  160. return "", 0, nil, "", err
  161. }
  162. klog.V(2).Infof("Successfully created single-zone GCE PD volume %s", name)
  163. default:
  164. return "", 0, nil, "", fmt.Errorf("replication-type of '%s' is not supported", replicationType)
  165. }
  166. labels, err := cloud.GetAutoLabelsForPD(disk)
  167. if err != nil {
  168. // We don't really want to leak the volume here...
  169. klog.Errorf("error getting labels for volume %q: %v", name, err)
  170. }
  171. return name, int(requestGB), labels, fstype, nil
  172. }
  173. // Returns the first path that exists, or empty string if none exist.
  174. func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String, diskName string) (string, error) {
  175. if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil {
  176. // It's possible udevadm was called on other disks so it should not block this
  177. // call. If it did fail on this disk, then the devicePath will either
  178. // not exist or be wrong. If it's wrong, then the scsi_id check below will fail.
  179. klog.Errorf("udevadmChangeToNewDrives failed with: %v", err)
  180. }
  181. for _, path := range devicePaths {
  182. if pathExists, err := mount.PathExists(path); err != nil {
  183. return "", fmt.Errorf("Error checking if path exists: %v", err)
  184. } else if pathExists {
  185. // validate that the path actually resolves to the correct disk
  186. serial, err := getScsiSerial(path, diskName)
  187. if err != nil {
  188. return "", fmt.Errorf("failed to get scsi serial %v", err)
  189. }
  190. if serial != diskName {
  191. // The device link is not pointing to the correct device
  192. // Trigger udev on this device to try to fix the link
  193. if udevErr := udevadmChangeToDrive(path); udevErr != nil {
  194. klog.Errorf("udevadmChangeToDrive %q failed with: %v", path, err)
  195. }
  196. // Return error to retry WaitForAttach and verifyDevicePath
  197. return "", fmt.Errorf("scsi_id serial %q for device %q doesn't match disk %q", serial, path, diskName)
  198. }
  199. // The device link is correct
  200. return path, nil
  201. }
  202. }
  203. return "", nil
  204. }
  205. // Calls scsi_id on the given devicePath to get the serial number reported by that device.
  206. func getScsiSerial(devicePath, diskName string) (string, error) {
  207. exists, err := utilpath.Exists(utilpath.CheckFollowSymlink, "/lib/udev/scsi_id")
  208. if err != nil {
  209. return "", fmt.Errorf("failed to check scsi_id existence: %v", err)
  210. }
  211. if !exists {
  212. klog.V(6).Infof("scsi_id doesn't exist; skipping check for %v", devicePath)
  213. return diskName, nil
  214. }
  215. out, err := exec.New().Command(
  216. "/lib/udev/scsi_id",
  217. "--page=0x83",
  218. "--whitelisted",
  219. fmt.Sprintf("--device=%v", devicePath)).CombinedOutput()
  220. if err != nil {
  221. return "", fmt.Errorf("scsi_id failed for device %q with %v", devicePath, err)
  222. }
  223. return parseScsiSerial(string(out))
  224. }
  225. // Parse the output returned by scsi_id and extract the serial number
  226. func parseScsiSerial(output string) (string, error) {
  227. substrings := scsiRegex.FindStringSubmatch(output)
  228. if substrings == nil {
  229. return "", fmt.Errorf("scsi_id output cannot be parsed: %q", output)
  230. }
  231. return substrings[1], nil
  232. }
  233. // Returns list of all /dev/disk/by-id/* paths for given PD.
  234. func getDiskByIDPaths(pdName string, partition string) []string {
  235. devicePaths := []string{
  236. filepath.Join(diskByIDPath, diskGooglePrefix+pdName),
  237. filepath.Join(diskByIDPath, diskScsiGooglePrefix+pdName),
  238. }
  239. if partition != "" {
  240. for i, path := range devicePaths {
  241. devicePaths[i] = path + diskPartitionSuffix + partition
  242. }
  243. }
  244. return devicePaths
  245. }
  246. // Return cloud provider
  247. func getCloudProvider(cloudProvider cloudprovider.Interface) (*gcecloud.Cloud, error) {
  248. var err error
  249. for numRetries := 0; numRetries < maxRetries; numRetries++ {
  250. gceCloudProvider, ok := cloudProvider.(*gcecloud.Cloud)
  251. if !ok || gceCloudProvider == nil {
  252. // Retry on error. See issue #11321
  253. klog.Errorf("Failed to get GCE Cloud Provider. plugin.host.GetCloudProvider returned %v instead", cloudProvider)
  254. time.Sleep(errorSleepDuration)
  255. continue
  256. }
  257. return gceCloudProvider, nil
  258. }
  259. return nil, fmt.Errorf("Failed to get GCE GCECloudProvider with error %v", err)
  260. }
  261. // Triggers the application of udev rules by calling "udevadm trigger
  262. // --action=change" for newly created "/dev/sd*" drives (exist only in
  263. // after set). This is workaround for Issue #7972. Once the underlying
  264. // issue has been resolved, this may be removed.
  265. func udevadmChangeToNewDrives(sdBeforeSet sets.String) error {
  266. sdAfter, err := filepath.Glob(diskSDPattern)
  267. if err != nil {
  268. return fmt.Errorf("error filepath.Glob(\"%s\"): %v\r", diskSDPattern, err)
  269. }
  270. for _, sd := range sdAfter {
  271. if !sdBeforeSet.Has(sd) {
  272. return udevadmChangeToDrive(sd)
  273. }
  274. }
  275. return nil
  276. }
  277. // Calls "udevadm trigger --action=change" on the specified drive.
  278. // drivePath must be the block device path to trigger on, in the format "/dev/sd*", or a symlink to it.
  279. // This is workaround for Issue #7972. Once the underlying issue has been resolved, this may be removed.
  280. func udevadmChangeToDrive(drivePath string) error {
  281. klog.V(5).Infof("udevadmChangeToDrive: drive=%q", drivePath)
  282. // Evaluate symlink, if any
  283. drive, err := filepath.EvalSymlinks(drivePath)
  284. if err != nil {
  285. return fmt.Errorf("udevadmChangeToDrive: filepath.EvalSymlinks(%q) failed with %v", drivePath, err)
  286. }
  287. klog.V(5).Infof("udevadmChangeToDrive: symlink path is %q", drive)
  288. // Check to make sure input is "/dev/sd*"
  289. if !strings.Contains(drive, diskSDPath) {
  290. return fmt.Errorf("udevadmChangeToDrive: expected input in the form \"%s\" but drive is %q", diskSDPattern, drive)
  291. }
  292. // Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/sd..."
  293. _, err = exec.New().Command(
  294. "udevadm",
  295. "trigger",
  296. "--action=change",
  297. fmt.Sprintf("--property-match=DEVNAME=%s", drive)).CombinedOutput()
  298. if err != nil {
  299. return fmt.Errorf("udevadmChangeToDrive: udevadm trigger failed for drive %q with %v", drive, err)
  300. }
  301. return nil
  302. }
  303. // Checks whether the given GCE PD volume spec is associated with a regional PD.
  304. func isRegionalPD(spec *volume.Spec) bool {
  305. if spec.PersistentVolume != nil {
  306. zonesLabel := spec.PersistentVolume.Labels[v1.LabelZoneFailureDomain]
  307. zones := strings.Split(zonesLabel, cloudvolume.LabelMultiZoneDelimiter)
  308. return len(zones) > 1
  309. }
  310. return false
  311. }