custom_metrics_stackdriver_autoscaling.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package autoscaling
  14. import (
  15. "context"
  16. "math"
  17. "time"
  18. gcm "google.golang.org/api/monitoring/v3"
  19. appsv1 "k8s.io/api/apps/v1"
  20. as "k8s.io/api/autoscaling/v2beta1"
  21. corev1 "k8s.io/api/core/v1"
  22. "k8s.io/apimachinery/pkg/api/resource"
  23. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  24. "k8s.io/apimachinery/pkg/util/wait"
  25. clientset "k8s.io/client-go/kubernetes"
  26. "k8s.io/kubernetes/test/e2e/framework"
  27. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  28. "k8s.io/kubernetes/test/e2e/instrumentation/monitoring"
  29. "github.com/onsi/ginkgo"
  30. "golang.org/x/oauth2/google"
  31. )
  32. const (
  33. stackdriverExporterDeployment = "stackdriver-exporter-deployment"
  34. dummyDeploymentName = "dummy-deployment"
  35. stackdriverExporterPod = "stackdriver-exporter-pod"
  36. externalMetricValue = int64(85)
  37. )
  38. var _ = SIGDescribe("[HPA] Horizontal pod autoscaling (scale resource: Custom Metrics from Stackdriver)", func() {
  39. ginkgo.BeforeEach(func() {
  40. framework.SkipUnlessProviderIs("gce", "gke")
  41. })
  42. f := framework.NewDefaultFramework("horizontal-pod-autoscaling")
  43. ginkgo.It("should scale down with Custom Metric of type Pod from Stackdriver [Feature:CustomMetricsAutoscaling]", func() {
  44. initialReplicas := 2
  45. // metric should cause scale down
  46. metricValue := int64(100)
  47. metricTarget := 2 * metricValue
  48. tc := CustomMetricTestCase{
  49. framework: f,
  50. kubeClient: f.ClientSet,
  51. initialReplicas: initialReplicas,
  52. scaledReplicas: 1,
  53. deployment: monitoring.SimpleStackdriverExporterDeployment(stackdriverExporterDeployment, f.Namespace.ObjectMeta.Name, int32(initialReplicas), metricValue),
  54. hpa: simplePodsHPA(f.Namespace.ObjectMeta.Name, metricTarget)}
  55. tc.Run()
  56. })
  57. ginkgo.It("should scale down with Custom Metric of type Object from Stackdriver [Feature:CustomMetricsAutoscaling]", func() {
  58. initialReplicas := 2
  59. // metric should cause scale down
  60. metricValue := int64(100)
  61. metricTarget := 2 * metricValue
  62. tc := CustomMetricTestCase{
  63. framework: f,
  64. kubeClient: f.ClientSet,
  65. initialReplicas: initialReplicas,
  66. scaledReplicas: 1,
  67. // Metric exported by deployment is ignored
  68. deployment: monitoring.SimpleStackdriverExporterDeployment(dummyDeploymentName, f.Namespace.ObjectMeta.Name, int32(initialReplicas), 0 /* ignored */),
  69. pod: monitoring.StackdriverExporterPod(stackdriverExporterPod, f.Namespace.Name, stackdriverExporterPod, monitoring.CustomMetricName, metricValue),
  70. hpa: objectHPA(f.Namespace.ObjectMeta.Name, metricTarget)}
  71. tc.Run()
  72. })
  73. ginkgo.It("should scale down with External Metric with target value from Stackdriver [Feature:CustomMetricsAutoscaling]", func() {
  74. initialReplicas := 2
  75. // metric should cause scale down
  76. metricValue := externalMetricValue
  77. metricTarget := 3 * metricValue
  78. metricTargets := map[string]externalMetricTarget{
  79. "target": {
  80. value: metricTarget,
  81. isAverage: false,
  82. },
  83. }
  84. tc := CustomMetricTestCase{
  85. framework: f,
  86. kubeClient: f.ClientSet,
  87. initialReplicas: initialReplicas,
  88. scaledReplicas: 1,
  89. // Metric exported by deployment is ignored
  90. deployment: monitoring.SimpleStackdriverExporterDeployment(dummyDeploymentName, f.Namespace.ObjectMeta.Name, int32(initialReplicas), 0 /* ignored */),
  91. pod: monitoring.StackdriverExporterPod(stackdriverExporterPod, f.Namespace.Name, stackdriverExporterPod, "target", metricValue),
  92. hpa: externalHPA(f.Namespace.ObjectMeta.Name, metricTargets)}
  93. tc.Run()
  94. })
  95. ginkgo.It("should scale down with External Metric with target average value from Stackdriver [Feature:CustomMetricsAutoscaling]", func() {
  96. initialReplicas := 2
  97. // metric should cause scale down
  98. metricValue := externalMetricValue
  99. metricAverageTarget := 3 * metricValue
  100. metricTargets := map[string]externalMetricTarget{
  101. "target_average": {
  102. value: metricAverageTarget,
  103. isAverage: true,
  104. },
  105. }
  106. tc := CustomMetricTestCase{
  107. framework: f,
  108. kubeClient: f.ClientSet,
  109. initialReplicas: initialReplicas,
  110. scaledReplicas: 1,
  111. // Metric exported by deployment is ignored
  112. deployment: monitoring.SimpleStackdriverExporterDeployment(dummyDeploymentName, f.Namespace.ObjectMeta.Name, int32(initialReplicas), 0 /* ignored */),
  113. pod: monitoring.StackdriverExporterPod(stackdriverExporterPod, f.Namespace.Name, stackdriverExporterPod, "target_average", externalMetricValue),
  114. hpa: externalHPA(f.Namespace.ObjectMeta.Name, metricTargets)}
  115. tc.Run()
  116. })
  117. ginkgo.It("should scale down with Custom Metric of type Pod from Stackdriver with Prometheus [Feature:CustomMetricsAutoscaling]", func() {
  118. initialReplicas := 2
  119. // metric should cause scale down
  120. metricValue := int64(100)
  121. metricTarget := 2 * metricValue
  122. tc := CustomMetricTestCase{
  123. framework: f,
  124. kubeClient: f.ClientSet,
  125. initialReplicas: initialReplicas,
  126. scaledReplicas: 1,
  127. deployment: monitoring.PrometheusExporterDeployment(stackdriverExporterDeployment, f.Namespace.ObjectMeta.Name, int32(initialReplicas), metricValue),
  128. hpa: simplePodsHPA(f.Namespace.ObjectMeta.Name, metricTarget)}
  129. tc.Run()
  130. })
  131. ginkgo.It("should scale up with two metrics of type Pod from Stackdriver [Feature:CustomMetricsAutoscaling]", func() {
  132. initialReplicas := 1
  133. // metric 1 would cause a scale down, if not for metric 2
  134. metric1Value := int64(100)
  135. metric1Target := 2 * metric1Value
  136. // metric2 should cause a scale up
  137. metric2Value := int64(200)
  138. metric2Target := int64(0.5 * float64(metric2Value))
  139. containers := []monitoring.CustomMetricContainerSpec{
  140. {
  141. Name: "stackdriver-exporter-metric1",
  142. MetricName: "metric1",
  143. MetricValue: metric1Value,
  144. },
  145. {
  146. Name: "stackdriver-exporter-metric2",
  147. MetricName: "metric2",
  148. MetricValue: metric2Value,
  149. },
  150. }
  151. metricTargets := map[string]int64{"metric1": metric1Target, "metric2": metric2Target}
  152. tc := CustomMetricTestCase{
  153. framework: f,
  154. kubeClient: f.ClientSet,
  155. initialReplicas: initialReplicas,
  156. scaledReplicas: 3,
  157. deployment: monitoring.StackdriverExporterDeployment(stackdriverExporterDeployment, f.Namespace.ObjectMeta.Name, int32(initialReplicas), containers),
  158. hpa: podsHPA(f.Namespace.ObjectMeta.Name, stackdriverExporterDeployment, metricTargets)}
  159. tc.Run()
  160. })
  161. ginkgo.It("should scale up with two External metrics from Stackdriver [Feature:CustomMetricsAutoscaling]", func() {
  162. initialReplicas := 1
  163. // metric 1 would cause a scale down, if not for metric 2
  164. metric1Value := externalMetricValue
  165. metric1Target := 2 * metric1Value
  166. // metric2 should cause a scale up
  167. metric2Value := externalMetricValue
  168. metric2Target := int64(math.Ceil(0.5 * float64(metric2Value)))
  169. metricTargets := map[string]externalMetricTarget{
  170. "external_metric_1": {
  171. value: metric1Target,
  172. isAverage: false,
  173. },
  174. "external_metric_2": {
  175. value: metric2Target,
  176. isAverage: false,
  177. },
  178. }
  179. containers := []monitoring.CustomMetricContainerSpec{
  180. {
  181. Name: "stackdriver-exporter-metric1",
  182. MetricName: "external_metric_1",
  183. MetricValue: metric1Value,
  184. },
  185. {
  186. Name: "stackdriver-exporter-metric2",
  187. MetricName: "external_metric_2",
  188. MetricValue: metric2Value,
  189. },
  190. }
  191. tc := CustomMetricTestCase{
  192. framework: f,
  193. kubeClient: f.ClientSet,
  194. initialReplicas: initialReplicas,
  195. scaledReplicas: 3,
  196. deployment: monitoring.StackdriverExporterDeployment(dummyDeploymentName, f.Namespace.ObjectMeta.Name, int32(initialReplicas), containers),
  197. hpa: externalHPA(f.Namespace.ObjectMeta.Name, metricTargets)}
  198. tc.Run()
  199. })
  200. })
  201. // CustomMetricTestCase is a struct for test cases.
  202. type CustomMetricTestCase struct {
  203. framework *framework.Framework
  204. hpa *as.HorizontalPodAutoscaler
  205. kubeClient clientset.Interface
  206. deployment *appsv1.Deployment
  207. pod *corev1.Pod
  208. initialReplicas int
  209. scaledReplicas int
  210. }
  211. // Run starts test case.
  212. func (tc *CustomMetricTestCase) Run() {
  213. projectID := framework.TestContext.CloudConfig.ProjectID
  214. ctx := context.Background()
  215. client, err := google.DefaultClient(ctx, gcm.CloudPlatformScope)
  216. // Hack for running tests locally, needed to authenticate in Stackdriver
  217. // If this is your use case, create application default credentials:
  218. // $ gcloud auth application-default login
  219. // and uncomment following lines:
  220. /*
  221. ts, err := google.DefaultTokenSource(oauth2.NoContext)
  222. e2elog.Logf("Couldn't get application default credentials, %v", err)
  223. if err != nil {
  224. framework.Failf("Error accessing application default credentials, %v", err)
  225. }
  226. client := oauth2.NewClient(oauth2.NoContext, ts)
  227. */
  228. gcmService, err := gcm.New(client)
  229. if err != nil {
  230. framework.Failf("Failed to create gcm service, %v", err)
  231. }
  232. // Set up a cluster: create a custom metric and set up k8s-sd adapter
  233. err = monitoring.CreateDescriptors(gcmService, projectID)
  234. if err != nil {
  235. framework.Failf("Failed to create metric descriptor: %v", err)
  236. }
  237. defer monitoring.CleanupDescriptors(gcmService, projectID)
  238. err = monitoring.CreateAdapter(monitoring.AdapterDefault)
  239. if err != nil {
  240. framework.Failf("Failed to set up: %v", err)
  241. }
  242. defer monitoring.CleanupAdapter(monitoring.AdapterDefault)
  243. // Run application that exports the metric
  244. err = createDeploymentToScale(tc.framework, tc.kubeClient, tc.deployment, tc.pod)
  245. if err != nil {
  246. framework.Failf("Failed to create stackdriver-exporter pod: %v", err)
  247. }
  248. defer cleanupDeploymentsToScale(tc.framework, tc.kubeClient, tc.deployment, tc.pod)
  249. // Wait for the deployment to run
  250. waitForReplicas(tc.deployment.ObjectMeta.Name, tc.framework.Namespace.ObjectMeta.Name, tc.kubeClient, 15*time.Minute, tc.initialReplicas)
  251. // Autoscale the deployment
  252. _, err = tc.kubeClient.AutoscalingV2beta1().HorizontalPodAutoscalers(tc.framework.Namespace.ObjectMeta.Name).Create(tc.hpa)
  253. if err != nil {
  254. framework.Failf("Failed to create HPA: %v", err)
  255. }
  256. defer tc.kubeClient.AutoscalingV2beta1().HorizontalPodAutoscalers(tc.framework.Namespace.ObjectMeta.Name).Delete(tc.hpa.ObjectMeta.Name, &metav1.DeleteOptions{})
  257. waitForReplicas(tc.deployment.ObjectMeta.Name, tc.framework.Namespace.ObjectMeta.Name, tc.kubeClient, 15*time.Minute, tc.scaledReplicas)
  258. }
  259. func createDeploymentToScale(f *framework.Framework, cs clientset.Interface, deployment *appsv1.Deployment, pod *corev1.Pod) error {
  260. if deployment != nil {
  261. _, err := cs.AppsV1().Deployments(f.Namespace.ObjectMeta.Name).Create(deployment)
  262. if err != nil {
  263. return err
  264. }
  265. }
  266. if pod != nil {
  267. _, err := cs.CoreV1().Pods(f.Namespace.ObjectMeta.Name).Create(pod)
  268. if err != nil {
  269. return err
  270. }
  271. }
  272. return nil
  273. }
  274. func cleanupDeploymentsToScale(f *framework.Framework, cs clientset.Interface, deployment *appsv1.Deployment, pod *corev1.Pod) {
  275. if deployment != nil {
  276. _ = cs.AppsV1().Deployments(f.Namespace.ObjectMeta.Name).Delete(deployment.ObjectMeta.Name, &metav1.DeleteOptions{})
  277. }
  278. if pod != nil {
  279. _ = cs.CoreV1().Pods(f.Namespace.ObjectMeta.Name).Delete(pod.ObjectMeta.Name, &metav1.DeleteOptions{})
  280. }
  281. }
  282. func simplePodsHPA(namespace string, metricTarget int64) *as.HorizontalPodAutoscaler {
  283. return podsHPA(namespace, stackdriverExporterDeployment, map[string]int64{monitoring.CustomMetricName: metricTarget})
  284. }
  285. func podsHPA(namespace string, deploymentName string, metricTargets map[string]int64) *as.HorizontalPodAutoscaler {
  286. var minReplicas int32 = 1
  287. metrics := []as.MetricSpec{}
  288. for metric, target := range metricTargets {
  289. metrics = append(metrics, as.MetricSpec{
  290. Type: as.PodsMetricSourceType,
  291. Pods: &as.PodsMetricSource{
  292. MetricName: metric,
  293. TargetAverageValue: *resource.NewQuantity(target, resource.DecimalSI),
  294. },
  295. })
  296. }
  297. return &as.HorizontalPodAutoscaler{
  298. ObjectMeta: metav1.ObjectMeta{
  299. Name: "custom-metrics-pods-hpa",
  300. Namespace: namespace,
  301. },
  302. Spec: as.HorizontalPodAutoscalerSpec{
  303. Metrics: metrics,
  304. MaxReplicas: 3,
  305. MinReplicas: &minReplicas,
  306. ScaleTargetRef: as.CrossVersionObjectReference{
  307. APIVersion: "apps/v1",
  308. Kind: "Deployment",
  309. Name: deploymentName,
  310. },
  311. },
  312. }
  313. }
  314. func objectHPA(namespace string, metricTarget int64) *as.HorizontalPodAutoscaler {
  315. var minReplicas int32 = 1
  316. return &as.HorizontalPodAutoscaler{
  317. ObjectMeta: metav1.ObjectMeta{
  318. Name: "custom-metrics-objects-hpa",
  319. Namespace: namespace,
  320. },
  321. Spec: as.HorizontalPodAutoscalerSpec{
  322. Metrics: []as.MetricSpec{
  323. {
  324. Type: as.ObjectMetricSourceType,
  325. Object: &as.ObjectMetricSource{
  326. MetricName: monitoring.CustomMetricName,
  327. Target: as.CrossVersionObjectReference{
  328. Kind: "Pod",
  329. Name: stackdriverExporterPod,
  330. },
  331. TargetValue: *resource.NewQuantity(metricTarget, resource.DecimalSI),
  332. },
  333. },
  334. },
  335. MaxReplicas: 3,
  336. MinReplicas: &minReplicas,
  337. ScaleTargetRef: as.CrossVersionObjectReference{
  338. APIVersion: "apps/v1",
  339. Kind: "Deployment",
  340. Name: dummyDeploymentName,
  341. },
  342. },
  343. }
  344. }
  345. type externalMetricTarget struct {
  346. value int64
  347. isAverage bool
  348. }
  349. func externalHPA(namespace string, metricTargets map[string]externalMetricTarget) *as.HorizontalPodAutoscaler {
  350. var minReplicas int32 = 1
  351. metricSpecs := []as.MetricSpec{}
  352. selector := &metav1.LabelSelector{
  353. MatchLabels: map[string]string{"resource.type": "gke_container"},
  354. MatchExpressions: []metav1.LabelSelectorRequirement{
  355. {
  356. Key: "resource.labels.namespace_id",
  357. Operator: metav1.LabelSelectorOpIn,
  358. // TODO(bskiba): change default to real namespace name once it is available
  359. // from Stackdriver.
  360. Values: []string{"default", "dummy"},
  361. },
  362. {
  363. Key: "resource.labels.pod_id",
  364. Operator: metav1.LabelSelectorOpExists,
  365. Values: []string{},
  366. },
  367. },
  368. }
  369. for metric, target := range metricTargets {
  370. var metricSpec as.MetricSpec
  371. metricSpec = as.MetricSpec{
  372. Type: as.ExternalMetricSourceType,
  373. External: &as.ExternalMetricSource{
  374. MetricName: "custom.googleapis.com|" + metric,
  375. MetricSelector: selector,
  376. },
  377. }
  378. if target.isAverage {
  379. metricSpec.External.TargetAverageValue = resource.NewQuantity(target.value, resource.DecimalSI)
  380. } else {
  381. metricSpec.External.TargetValue = resource.NewQuantity(target.value, resource.DecimalSI)
  382. }
  383. metricSpecs = append(metricSpecs, metricSpec)
  384. }
  385. hpa := &as.HorizontalPodAutoscaler{
  386. ObjectMeta: metav1.ObjectMeta{
  387. Name: "custom-metrics-external-hpa",
  388. Namespace: namespace,
  389. },
  390. Spec: as.HorizontalPodAutoscalerSpec{
  391. Metrics: metricSpecs,
  392. MaxReplicas: 3,
  393. MinReplicas: &minReplicas,
  394. ScaleTargetRef: as.CrossVersionObjectReference{
  395. APIVersion: "apps/v1",
  396. Kind: "Deployment",
  397. Name: dummyDeploymentName,
  398. },
  399. },
  400. }
  401. return hpa
  402. }
  403. func waitForReplicas(deploymentName, namespace string, cs clientset.Interface, timeout time.Duration, desiredReplicas int) {
  404. interval := 20 * time.Second
  405. err := wait.PollImmediate(interval, timeout, func() (bool, error) {
  406. deployment, err := cs.AppsV1().Deployments(namespace).Get(deploymentName, metav1.GetOptions{})
  407. if err != nil {
  408. framework.Failf("Failed to get replication controller %s: %v", deployment, err)
  409. }
  410. replicas := int(deployment.Status.ReadyReplicas)
  411. e2elog.Logf("waiting for %d replicas (current: %d)", desiredReplicas, replicas)
  412. return replicas == desiredReplicas, nil // Expected number of replicas found. Exit.
  413. })
  414. if err != nil {
  415. framework.Failf("Timeout waiting %v for %v replicas", timeout, desiredReplicas)
  416. }
  417. }