Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 35 additions & 15 deletions pkg/estimator/server/framework/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,18 @@ type Framework interface {
Handle
// RunEstimateReplicasPlugins runs the set of configured EstimateReplicasPlugins
// for estimating replicas based on the given replicaRequirements.
// It returns an integer and an Result.
// It returns an integer and a Result.
// The integer represents the minimum calculated value of estimated replicas from each EstimateReplicasPlugin.
// The Result contains code, reasons and error
// it is merged from all plugins returned result codes
// The Result contains code, reasons and error.
// It is merged from all plugins' returned result codes.
RunEstimateReplicasPlugins(ctx context.Context, snapshot *schedcache.Snapshot, replicaRequirements *pb.ReplicaRequirements) (int32, *Result)
// RunEstimateComponentsPlugins runs the set of configured EstimateComponentsPlugins
// for estimating the maximum number of complete component sets based on the given components.
// It returns an integer and a Result.
// The integer represents the minimum calculated value of estimated component sets from each EstimateComponentsPlugin.
// The Result contains code, reasons and error.
// It is merged from all plugins' returned result codes.
RunEstimateComponentsPlugins(ctx context.Context, snapshot *schedcache.Snapshot, components []pb.Component) (int32, *Result)
// TODO(wengyao04): we can add filter and score plugin extension points if needed in the future
}

Expand All @@ -47,21 +54,34 @@ type Plugin interface {
}

// EstimateReplicasPlugin is an interface for replica estimation plugins.
// These estimators are used to estimate the replicas for a given pb.ReplicaRequirements
// These estimators are used to estimate the replicas for a given pb.ReplicaRequirements.
type EstimateReplicasPlugin interface {
Plugin
// Estimate is called for each MaxAvailableReplicas request.
// It returns an integer and an error
// The integer representing the number of calculated replica for the given replicaRequirements
// The Result contains code, reasons and error
// it is merged from all plugins returned result codes
// It returns an integer and a Result.
// The integer represents the number of calculated replicas for the given replicaRequirements.
// The Result contains code, reasons and error.
// It is merged from all plugins' returned result codes.
Estimate(ctx context.Context, snapshot *schedcache.Snapshot, replicaRequirements *pb.ReplicaRequirements) (int32, *Result)
}

// EstimateComponentsPlugin is an interface for component set estimation plugins.
// These estimators are used to estimate the maximum number of complete component sets
// for a given set of components with different replica requirements.
type EstimateComponentsPlugin interface {
Plugin
// EstimateComponents is called for each MaxAvailableComponentSets request.
// It returns an integer and a Result.
// The integer represents the estimated number of complete component sets that can be scheduled.
// The Result contains code, reasons and error.
// It is merged from all plugins' returned result codes.
EstimateComponents(ctx context.Context, snapshot *schedcache.Snapshot, components []pb.Component) (int32, *Result)
}

// Handle provides data and some tools that plugins can use. It is
// passed to the plugin factories at the time of plugin initialization. Plugins
// must store and use this handle to call framework functions.
// We follow the design pattern as kubernetes scheduler framework
// We follow the design pattern of the Kubernetes scheduler framework.
type Handle interface {
ClientSet() clientset.Interface
SharedInformerFactory() informers.SharedInformerFactory
Expand All @@ -85,9 +105,9 @@ const (
// NOTE: A nil status is also considered as "Success".
Success Code = iota
// Unschedulable is used when a plugin finds the resource unschedulable.
// The accompanying status message should explain why the it is unschedulable.
// The accompanying status message should explain why it is unschedulable.
Unschedulable
// Nooperation is used when a plugin is disabled or the plugin list are empty
// Nooperation is used when a plugin is disabled or the plugin list is empty.
Noopperation
// Error is used for internal plugin errors, unexpected input, etc.
Error
Expand Down Expand Up @@ -115,8 +135,8 @@ func NewResult(code Code, reasons ...string) *Result {
// PluginToResult maps plugin name to Result.
type PluginToResult map[string]*Result

// Merge merges the statuses in the map into one. The resulting status code have the following
// precedence: Error, Unschedulable, Disabled.
// Merge merges the statuses in the map into one. The resulting status code has the following
// precedence: Error, Unschedulable, Nooperation.
func (p PluginToResult) Merge() *Result {
if len(p) == 0 {
return NewResult(Noopperation, "plugin results are empty")
Expand Down Expand Up @@ -161,8 +181,8 @@ func (s *Result) IsUnschedulable() bool {
return s != nil && s.code == Unschedulable
}

// IsNoOperation returns true if "Result" is not nil and Code is "Nooperation"
// ToDo (wengyao04): we can remove it once we include node resource estimation as the default plugin in the future
// IsNoOperation returns true if "Result" is not nil and Code is "Nooperation".
// TODO (wengyao04): we can remove it once we include node resource estimation as the default plugin in the future.
func (s *Result) IsNoOperation() bool {
return s != nil && s.code == Noopperation
}
Expand Down
56 changes: 52 additions & 4 deletions pkg/estimator/server/framework/runtime/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,20 @@ import (
)

const (
estimator = "Estimator"
// estimator is the legacy label for replica estimation metrics.
// Deprecated: Use estimateReplicasExtension instead. This will be removed in a future release.
estimator = "Estimator"
estimateReplicasExtension = "estimate_replicas"
estimateComponentsExtension = "estimate_components"
)

// frameworkImpl implements the Framework interface and is responsible for initializing and running scheduler
// plugins.
type frameworkImpl struct {
estimateReplicasPlugins []framework.EstimateReplicasPlugin
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
estimateReplicasPlugins []framework.EstimateReplicasPlugin
estimateComponentsPlugins []framework.EstimateComponentsPlugin
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
}

var _ framework.Framework = &frameworkImpl{}
Expand Down Expand Up @@ -85,12 +90,16 @@ func NewFramework(r Registry, opts ...Option) (framework.Framework, error) {
estimateReplicasPluginsList := reflect.ValueOf(&f.estimateReplicasPlugins).Elem()
estimateReplicasType := estimateReplicasPluginsList.Type().Elem()

estimateComponentsPluginsList := reflect.ValueOf(&f.estimateComponentsPlugins).Elem()
estimateComponentsType := estimateComponentsPluginsList.Type().Elem()

for name, factory := range r {
p, err := factory(f)
if err != nil {
return nil, fmt.Errorf("failed to initialize plugin %q: %w", name, err)
}
addPluginToList(p, estimateReplicasType, &estimateReplicasPluginsList)
addPluginToList(p, estimateComponentsType, &estimateComponentsPluginsList)
}
return f, nil
}
Expand Down Expand Up @@ -119,7 +128,10 @@ func (frw *frameworkImpl) SharedInformerFactory() informers.SharedInformerFactor
func (frw *frameworkImpl) RunEstimateReplicasPlugins(ctx context.Context, snapshot *schedcache.Snapshot, replicaRequirements *pb.ReplicaRequirements) (int32, *framework.Result) {
startTime := time.Now()
defer func() {
// Emit metrics with both old and new labels for backward compatibility
// TODO: Remove estimator label in a future release (deprecated)
metrics.FrameworkExtensionPointDuration.WithLabelValues(estimator).Observe(utilmetrics.DurationInSeconds(startTime))
metrics.FrameworkExtensionPointDuration.WithLabelValues(estimateReplicasExtension).Observe(utilmetrics.DurationInSeconds(startTime))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @RainbowMango ,

In order to differentiate the metrics between EstimateReplica and EstimateComponents plugins, I decided to introduce a new metrics with EstimateComponents label and plan to use EstimateReplica to replace the old Estimator label.

Additionally, to avoid breaking change I decided to emit the same metrics twice but with different label.
What do you think? I am OK with reusing Estimator but EstimateReplica might be clearer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm okay with introducing EstimateReplica to replace Estimator. However, if we proceed with this change, the release note should include an additional section on the deprecation of the existing metrics.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with this way by introducing a new metric to deprecate the old one.

+1 on the release note

In addition, I realized both the old and new label names are written in camel case, which is not recommended.
I failed to find some documentation about the naming rule, but there is a metrics linter in the Prometheus project:
https://github.com/RainbowMango/client_golang/blob/e729ba11961abb3a50910c324221d06a35bdce4f/prometheus/testutil/promlint/validations/generic_name_validations.go#L74-L89

PS: I introduced this tool to Kubernetes several years ago, but I didn't get a chance to do the same thing in Karmada. I think Karmada should adopt this tool as well.

Copy link
Contributor Author

@seanlaii seanlaii Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the info! Updated the label values with snake case and the release note.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In addition, I realized both the old and new label names are written in camel case, which is not recommended.

Hey, I need to correct my previous comment about the label format, according to the validation tools I linked above, only the label name should be written in snake_case. Actually, what we are talking about here is the value, I don't see any rules for it.

I see you updated the label value to snake_case, which also looks fine to me.

}()
var replica int32 = math.MaxInt32
results := make(framework.PluginToResult)
Expand All @@ -141,6 +153,42 @@ func (frw *frameworkImpl) runEstimateReplicasPlugins(
) (int32, *framework.Result) {
startTime := time.Now()
replica, ret := pl.Estimate(ctx, snapshot, replicaRequirements)
// Emit metrics with both old and new labels for backward compatibility
// TODO: Remove estimator label in a future release (deprecated)
metrics.PluginExecutionDuration.WithLabelValues(pl.Name(), estimator).Observe(utilmetrics.DurationInSeconds(startTime))
metrics.PluginExecutionDuration.WithLabelValues(pl.Name(), estimateReplicasExtension).Observe(utilmetrics.DurationInSeconds(startTime))
return replica, ret
}

// RunEstimateComponentsPlugins runs the set of configured EstimateComponentsPlugins
// for estimating the maximum number of complete component sets based on the given components.
// It returns an integer and a Result.
// The integer represents the minimum calculated value of estimated component sets from each EstimateComponentsPlugin.
func (frw *frameworkImpl) RunEstimateComponentsPlugins(ctx context.Context, snapshot *schedcache.Snapshot, components []pb.Component) (int32, *framework.Result) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(estimateComponentsExtension).Observe(utilmetrics.DurationInSeconds(startTime))
}()
var sets int32 = math.MaxInt32
results := make(framework.PluginToResult)
for _, pl := range frw.estimateComponentsPlugins {
plSets, ret := frw.runEstimateComponentsPlugins(ctx, pl, snapshot, components)
if (ret.IsSuccess() || ret.IsUnschedulable()) && plSets < sets {
sets = plSets
}
results[pl.Name()] = ret
}
return sets, results.Merge()
}

func (frw *frameworkImpl) runEstimateComponentsPlugins(
ctx context.Context,
pl framework.EstimateComponentsPlugin,
snapshot *schedcache.Snapshot,
components []pb.Component,
) (int32, *framework.Result) {
startTime := time.Now()
sets, ret := pl.EstimateComponents(ctx, snapshot, components)
metrics.PluginExecutionDuration.WithLabelValues(pl.Name(), estimateComponentsExtension).Observe(utilmetrics.DurationInSeconds(startTime))
return sets, ret
}
Loading