|
@@ -466,163 +466,162 @@ func (sched *Scheduler) scheduleOne() {
|
|
|
customcache.LabCache.CleanCache()
|
|
|
klog.Infof("Cache: %v", customcache.LabCache.Cache)
|
|
|
default:
|
|
|
- }
|
|
|
-
|
|
|
- fwk := sched.config.Framework
|
|
|
+ fwk := sched.config.Framework
|
|
|
|
|
|
- pod := sched.config.NextPod()
|
|
|
- // pod could be nil when schedulerQueue is closed
|
|
|
- if pod == nil {
|
|
|
- return
|
|
|
- }
|
|
|
- if pod.DeletionTimestamp != nil {
|
|
|
- sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
|
|
- klog.V(3).Infof("Skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
|
|
- return
|
|
|
- }
|
|
|
+ pod := sched.config.NextPod()
|
|
|
+ // pod could be nil when schedulerQueue is closed
|
|
|
+ if pod == nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if pod.DeletionTimestamp != nil {
|
|
|
+ sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
|
|
+ klog.V(3).Infof("Skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
|
|
+ return
|
|
|
+ }
|
|
|
|
|
|
- klog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
|
|
|
+ klog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
|
|
|
|
|
|
- // Synchronously attempt to find a fit for the pod.
|
|
|
- start := time.Now()
|
|
|
- pluginContext := framework.NewPluginContext()
|
|
|
- scheduleResult, err := sched.schedule(pod)
|
|
|
- if err != nil {
|
|
|
- // schedule() may have failed because the pod would not fit on any host, so we try to
|
|
|
- // preempt, with the expectation that the next time the pod is tried for scheduling it
|
|
|
- // will fit due to the preemption. It is also possible that a different pod will schedule
|
|
|
- // into the resources that were preempted, but this is harmless.
|
|
|
- if fitError, ok := err.(*core.FitError); ok {
|
|
|
- if !util.PodPriorityEnabled() || sched.config.DisablePreemption {
|
|
|
- klog.V(3).Infof("Pod priority feature is not enabled or preemption is disabled by scheduler configuration." +
|
|
|
- " No preemption is performed.")
|
|
|
+ // Synchronously attempt to find a fit for the pod.
|
|
|
+ start := time.Now()
|
|
|
+ pluginContext := framework.NewPluginContext()
|
|
|
+ scheduleResult, err := sched.schedule(pod)
|
|
|
+ if err != nil {
|
|
|
+ // schedule() may have failed because the pod would not fit on any host, so we try to
|
|
|
+ // preempt, with the expectation that the next time the pod is tried for scheduling it
|
|
|
+ // will fit due to the preemption. It is also possible that a different pod will schedule
|
|
|
+ // into the resources that were preempted, but this is harmless.
|
|
|
+ if fitError, ok := err.(*core.FitError); ok {
|
|
|
+ if !util.PodPriorityEnabled() || sched.config.DisablePreemption {
|
|
|
+ klog.V(3).Infof("Pod priority feature is not enabled or preemption is disabled by scheduler configuration." +
|
|
|
+ " No preemption is performed.")
|
|
|
+ } else {
|
|
|
+ preemptionStartTime := time.Now()
|
|
|
+ sched.preempt(pod, fitError)
|
|
|
+ metrics.PreemptionAttempts.Inc()
|
|
|
+ metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime))
|
|
|
+ metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
|
|
|
+ metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
|
|
|
+ metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
|
|
|
+ }
|
|
|
+ // Pod did not fit anywhere, so it is counted as a failure. If preemption
|
|
|
+ // succeeds, the pod should get counted as a success the next time we try to
|
|
|
+ // schedule it. (hopefully)
|
|
|
+ metrics.PodScheduleFailures.Inc()
|
|
|
} else {
|
|
|
- preemptionStartTime := time.Now()
|
|
|
- sched.preempt(pod, fitError)
|
|
|
- metrics.PreemptionAttempts.Inc()
|
|
|
- metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime))
|
|
|
- metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
|
|
|
- metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
|
|
|
- metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
|
|
|
+ klog.Errorf("error selecting node for pod: %v", err)
|
|
|
+ metrics.PodScheduleErrors.Inc()
|
|
|
}
|
|
|
- // Pod did not fit anywhere, so it is counted as a failure. If preemption
|
|
|
- // succeeds, the pod should get counted as a success the next time we try to
|
|
|
- // schedule it. (hopefully)
|
|
|
- metrics.PodScheduleFailures.Inc()
|
|
|
- } else {
|
|
|
- klog.Errorf("error selecting node for pod: %v", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInSeconds(start))
|
|
|
+ metrics.DeprecatedSchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
|
|
+ // Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
|
|
|
+ // This allows us to keep scheduling without waiting on binding to occur.
|
|
|
+ assumedPod := pod.DeepCopy()
|
|
|
+
|
|
|
+ // Assume volumes first before assuming the pod.
|
|
|
+ //
|
|
|
+ // If all volumes are completely bound, then allBound is true and binding will be skipped.
|
|
|
+ //
|
|
|
+ // Otherwise, binding of volumes is started after the pod is assumed, but before pod binding.
|
|
|
+ //
|
|
|
+ // This function modifies 'assumedPod' if volume binding is required.
|
|
|
+ allBound, err := sched.assumeVolumes(assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ if err != nil {
|
|
|
+ klog.Errorf("error assuming volumes: %v", err)
|
|
|
metrics.PodScheduleErrors.Inc()
|
|
|
+ return
|
|
|
}
|
|
|
- return
|
|
|
- }
|
|
|
- metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInSeconds(start))
|
|
|
- metrics.DeprecatedSchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
|
|
- // Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
|
|
|
- // This allows us to keep scheduling without waiting on binding to occur.
|
|
|
- assumedPod := pod.DeepCopy()
|
|
|
-
|
|
|
- // Assume volumes first before assuming the pod.
|
|
|
- //
|
|
|
- // If all volumes are completely bound, then allBound is true and binding will be skipped.
|
|
|
- //
|
|
|
- // Otherwise, binding of volumes is started after the pod is assumed, but before pod binding.
|
|
|
- //
|
|
|
- // This function modifies 'assumedPod' if volume binding is required.
|
|
|
- allBound, err := sched.assumeVolumes(assumedPod, scheduleResult.SuggestedHost)
|
|
|
- if err != nil {
|
|
|
- klog.Errorf("error assuming volumes: %v", err)
|
|
|
- metrics.PodScheduleErrors.Inc()
|
|
|
- return
|
|
|
- }
|
|
|
|
|
|
- // Run "reserve" plugins.
|
|
|
- if sts := fwk.RunReservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
|
|
|
- sched.recordSchedulingFailure(assumedPod, sts.AsError(), SchedulerError, sts.Message())
|
|
|
- metrics.PodScheduleErrors.Inc()
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- // assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost
|
|
|
- err = sched.assume(assumedPod, scheduleResult.SuggestedHost)
|
|
|
- if err != nil {
|
|
|
- klog.Errorf("error assuming pod: %v", err)
|
|
|
- metrics.PodScheduleErrors.Inc()
|
|
|
- // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
- fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
- return
|
|
|
- }
|
|
|
- // bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
|
|
- go func() {
|
|
|
- // Bind volumes first before Pod
|
|
|
- if !allBound {
|
|
|
- err := sched.bindVolumes(assumedPod)
|
|
|
- if err != nil {
|
|
|
- klog.Errorf("error binding volumes: %v", err)
|
|
|
- metrics.PodScheduleErrors.Inc()
|
|
|
- // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
- fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
- return
|
|
|
- }
|
|
|
+ // Run "reserve" plugins.
|
|
|
+ if sts := fwk.RunReservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
|
|
|
+ sched.recordSchedulingFailure(assumedPod, sts.AsError(), SchedulerError, sts.Message())
|
|
|
+ metrics.PodScheduleErrors.Inc()
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
- // Run "permit" plugins.
|
|
|
- permitStatus := fwk.RunPermitPlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
- if !permitStatus.IsSuccess() {
|
|
|
- var reason string
|
|
|
- if permitStatus.Code() == framework.Unschedulable {
|
|
|
- reason = v1.PodReasonUnschedulable
|
|
|
- } else {
|
|
|
- metrics.PodScheduleErrors.Inc()
|
|
|
- reason = SchedulerError
|
|
|
- }
|
|
|
- if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
|
|
- klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
|
|
- }
|
|
|
- sched.recordSchedulingFailure(assumedPod, permitStatus.AsError(), reason, permitStatus.Message())
|
|
|
+ // assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost
|
|
|
+ err = sched.assume(assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ if err != nil {
|
|
|
+ klog.Errorf("error assuming pod: %v", err)
|
|
|
+ metrics.PodScheduleErrors.Inc()
|
|
|
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
return
|
|
|
}
|
|
|
+ // bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
|
|
+ go func() {
|
|
|
+ // Bind volumes first before Pod
|
|
|
+ if !allBound {
|
|
|
+ err := sched.bindVolumes(assumedPod)
|
|
|
+ if err != nil {
|
|
|
+ klog.Errorf("error binding volumes: %v", err)
|
|
|
+ metrics.PodScheduleErrors.Inc()
|
|
|
+ // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
+ fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- // Run "prebind" plugins.
|
|
|
- prebindStatus := fwk.RunPrebindPlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
- if !prebindStatus.IsSuccess() {
|
|
|
- var reason string
|
|
|
- if prebindStatus.Code() == framework.Unschedulable {
|
|
|
- reason = v1.PodReasonUnschedulable
|
|
|
- } else {
|
|
|
- metrics.PodScheduleErrors.Inc()
|
|
|
- reason = SchedulerError
|
|
|
+ // Run "permit" plugins.
|
|
|
+ permitStatus := fwk.RunPermitPlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ if !permitStatus.IsSuccess() {
|
|
|
+ var reason string
|
|
|
+ if permitStatus.Code() == framework.Unschedulable {
|
|
|
+ reason = v1.PodReasonUnschedulable
|
|
|
+ } else {
|
|
|
+ metrics.PodScheduleErrors.Inc()
|
|
|
+ reason = SchedulerError
|
|
|
+ }
|
|
|
+ if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
|
|
+ klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
|
|
+ }
|
|
|
+ sched.recordSchedulingFailure(assumedPod, permitStatus.AsError(), reason, permitStatus.Message())
|
|
|
+ // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
+ fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ return
|
|
|
}
|
|
|
- if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
|
|
- klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
|
|
+
|
|
|
+ // Run "prebind" plugins.
|
|
|
+ prebindStatus := fwk.RunPrebindPlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ if !prebindStatus.IsSuccess() {
|
|
|
+ var reason string
|
|
|
+ if prebindStatus.Code() == framework.Unschedulable {
|
|
|
+ reason = v1.PodReasonUnschedulable
|
|
|
+ } else {
|
|
|
+ metrics.PodScheduleErrors.Inc()
|
|
|
+ reason = SchedulerError
|
|
|
+ }
|
|
|
+ if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
|
|
+ klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
|
|
+ }
|
|
|
+ sched.recordSchedulingFailure(assumedPod, prebindStatus.AsError(), reason, prebindStatus.Message())
|
|
|
+ // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
+ fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ return
|
|
|
}
|
|
|
- sched.recordSchedulingFailure(assumedPod, prebindStatus.AsError(), reason, prebindStatus.Message())
|
|
|
- // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
- fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
- return
|
|
|
- }
|
|
|
|
|
|
- err := sched.bind(assumedPod, &v1.Binding{
|
|
|
- ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
|
|
|
- Target: v1.ObjectReference{
|
|
|
- Kind: "Node",
|
|
|
- Name: scheduleResult.SuggestedHost,
|
|
|
- },
|
|
|
- })
|
|
|
- metrics.E2eSchedulingLatency.Observe(metrics.SinceInSeconds(start))
|
|
|
- metrics.DeprecatedE2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
|
|
|
- if err != nil {
|
|
|
- klog.Errorf("error binding pod: %v", err)
|
|
|
- metrics.PodScheduleErrors.Inc()
|
|
|
- // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
- fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
- } else {
|
|
|
- klog.V(2).Infof("pod %v/%v is bound successfully on node %v, %d nodes evaluated, %d nodes were found feasible", assumedPod.Namespace, assumedPod.Name, scheduleResult.SuggestedHost, scheduleResult.EvaluatedNodes, scheduleResult.FeasibleNodes)
|
|
|
- metrics.PodScheduleSuccesses.Inc()
|
|
|
+ err := sched.bind(assumedPod, &v1.Binding{
|
|
|
+ ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
|
|
|
+ Target: v1.ObjectReference{
|
|
|
+ Kind: "Node",
|
|
|
+ Name: scheduleResult.SuggestedHost,
|
|
|
+ },
|
|
|
+ })
|
|
|
+ metrics.E2eSchedulingLatency.Observe(metrics.SinceInSeconds(start))
|
|
|
+ metrics.DeprecatedE2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
|
|
|
+ if err != nil {
|
|
|
+ klog.Errorf("error binding pod: %v", err)
|
|
|
+ metrics.PodScheduleErrors.Inc()
|
|
|
+ // trigger un-reserve plugins to clean up state associated with the reserved Pod
|
|
|
+ fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ } else {
|
|
|
+ klog.V(2).Infof("pod %v/%v is bound successfully on node %v, %d nodes evaluated, %d nodes were found feasible", assumedPod.Namespace, assumedPod.Name, scheduleResult.SuggestedHost, scheduleResult.EvaluatedNodes, scheduleResult.FeasibleNodes)
|
|
|
+ metrics.PodScheduleSuccesses.Inc()
|
|
|
|
|
|
- // Run "postbind" plugins.
|
|
|
- fwk.RunPostbindPlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
- }
|
|
|
- }()
|
|
|
+ // Run "postbind" plugins.
|
|
|
+ fwk.RunPostbindPlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ }
|
|
|
}
|