123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- /*
- Copyright 2019 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package noderesources
- import (
- "context"
- "fmt"
- v1 "k8s.io/api/core/v1"
- "k8s.io/apimachinery/pkg/runtime"
- framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
- schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
- )
- // ResourceLimits is a score plugin that increases score of input node by 1 if the node satisfies
- // input pod's resource limits
- type ResourceLimits struct {
- handle framework.FrameworkHandle
- }
- var _ = framework.PreScorePlugin(&ResourceLimits{})
- var _ = framework.ScorePlugin(&ResourceLimits{})
- const (
- // ResourceLimitsName is the name of the plugin used in the plugin registry and configurations.
- ResourceLimitsName = "NodeResourceLimits"
- // preScoreStateKey is the key in CycleState to NodeResourceLimits pre-computed data.
- // Using the name of the plugin will likely help us avoid collisions with other plugins.
- preScoreStateKey = "PreScore" + ResourceLimitsName
- )
- // preScoreState computed at PreScore and used at Score.
- type preScoreState struct {
- podResourceRequest *schedulernodeinfo.Resource
- }
- // Clone the preScore state.
- func (s *preScoreState) Clone() framework.StateData {
- return s
- }
- // Name returns name of the plugin. It is used in logs, etc.
- func (rl *ResourceLimits) Name() string {
- return ResourceLimitsName
- }
- // PreScore builds and writes cycle state used by Score and NormalizeScore.
- func (rl *ResourceLimits) PreScore(
- pCtx context.Context,
- cycleState *framework.CycleState,
- pod *v1.Pod,
- nodes []*v1.Node,
- ) *framework.Status {
- if len(nodes) == 0 {
- // No nodes to score.
- return nil
- }
- if rl.handle.SnapshotSharedLister() == nil {
- return framework.NewStatus(framework.Error, fmt.Sprintf("empty shared lister"))
- }
- s := &preScoreState{
- podResourceRequest: getResourceLimits(pod),
- }
- cycleState.Write(preScoreStateKey, s)
- return nil
- }
- func getPodResource(cycleState *framework.CycleState) (*schedulernodeinfo.Resource, error) {
- c, err := cycleState.Read(preScoreStateKey)
- if err != nil {
- return nil, fmt.Errorf("Error reading %q from cycleState: %v", preScoreStateKey, err)
- }
- s, ok := c.(*preScoreState)
- if !ok {
- return nil, fmt.Errorf("%+v convert to ResourceLimits.preScoreState error", c)
- }
- return s.podResourceRequest, nil
- }
- // Score invoked at the Score extension point.
- // The "score" returned in this function is the matching number of pods on the `nodeName`.
- // Currently works as follows:
- // If a node does not publish its allocatable resources (cpu and memory both), the node score is not affected.
- // If a pod does not specify its cpu and memory limits both, the node score is not affected.
- // If one or both of cpu and memory limits of the pod are satisfied, the node is assigned a score of 1.
- // Rationale of choosing the lowest score of 1 is that this is mainly selected to break ties between nodes that have
- // same scores assigned by one of least and most requested priority functions.
- func (rl *ResourceLimits) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
- nodeInfo, err := rl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
- if err != nil || nodeInfo.Node() == nil {
- return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v, node is nil: %v", nodeName, err, nodeInfo.Node() == nil))
- }
- allocatableResources := nodeInfo.AllocatableResource()
- podLimits, err := getPodResource(state)
- if err != nil {
- return 0, framework.NewStatus(framework.Error, err.Error())
- }
- cpuScore := computeScore(podLimits.MilliCPU, allocatableResources.MilliCPU)
- memScore := computeScore(podLimits.Memory, allocatableResources.Memory)
- score := int64(0)
- if cpuScore == 1 || memScore == 1 {
- score = 1
- }
- return score, nil
- }
- // ScoreExtensions of the Score plugin.
- func (rl *ResourceLimits) ScoreExtensions() framework.ScoreExtensions {
- return nil
- }
- // NewResourceLimits initializes a new plugin and returns it.
- func NewResourceLimits(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
- return &ResourceLimits{handle: h}, nil
- }
- // getResourceLimits computes resource limits for input pod.
- // The reason to create this new function is to be consistent with other
- // priority functions because most or perhaps all priority functions work
- // with schedulernodeinfo.Resource.
- func getResourceLimits(pod *v1.Pod) *schedulernodeinfo.Resource {
- result := &schedulernodeinfo.Resource{}
- for _, container := range pod.Spec.Containers {
- result.Add(container.Resources.Limits)
- }
- // take max_resource(sum_pod, any_init_container)
- for _, container := range pod.Spec.InitContainers {
- result.SetMaxResource(container.Resources.Limits)
- }
- return result
- }
- // computeScore returns 1 if limit value is less than or equal to allocatable
- // value, otherwise it returns 0.
- func computeScore(limit, allocatable int64) int64 {
- if limit != 0 && allocatable != 0 && limit <= allocatable {
- return 1
- }
- return 0
- }
|