Skip to content

Commit

Permalink
Introduce a mechanism to scheduler to actively trigger rescheduling
Browse files Browse the repository at this point in the history
Signed-off-by: chaosi-zju <[email protected]>
  • Loading branch information
chaosi-zju committed Apr 20, 2024
1 parent fdad87e commit d028c18
Show file tree
Hide file tree
Showing 15 changed files with 403 additions and 223 deletions.
8 changes: 8 additions & 0 deletions api/openapi-spec/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -19151,6 +19151,10 @@
"$ref": "#/definitions/com.github.karmada-io.karmada.pkg.apis.work.v1alpha2.BindingSnapshot"
}
},
"rescheduleTriggeredAt": {
"description": "RescheduleTriggeredAt is a timestamp representing when the referenced resource is triggered rescheduling. When this field is updated, it means a rescheduling is manually triggered by user, and the expected behavior of this action is to do a complete recalculation without referring to last scheduling results. It works with the status.lastScheduledTime field, and only when this timestamp is later than timestamp in status.lastScheduledTime will the rescheduling actually execute, otherwise, ignored.\n\nIt is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.",
"$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Time"
},
"resource": {
"description": "Resource represents the Kubernetes resource to be propagated.",
"default": {},
Expand Down Expand Up @@ -19182,6 +19186,10 @@
"$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Condition"
}
},
"lastScheduledTime": {
"description": "LastScheduledTime representing the latest timestamp when scheduler successfully finished a scheduling. It is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.",
"$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Time"
},
"schedulerObservedGeneration": {
"description": "SchedulerObservedGeneration is the generation(.metadata.generation) observed by the scheduler. If SchedulerObservedGeneration is less than the generation in metadata means the scheduler hasn't confirmed the scheduling result or hasn't done the schedule yet.",
"type": "integer",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,18 @@ spec:
- name
type: object
type: array
rescheduleTriggeredAt:
description: "RescheduleTriggeredAt is a timestamp representing when
the referenced resource is triggered rescheduling. When this field
is updated, it means a rescheduling is manually triggered by user,
and the expected behavior of this action is to do a complete recalculation
without referring to last scheduling results. It works with the
status.lastScheduledTime field, and only when this timestamp is
later than timestamp in status.lastScheduledTime will the rescheduling
actually execute, otherwise, ignored. \n It is represented in RFC3339
form (like '2006-01-02T15:04:05Z') and is in UTC."
format: date-time
type: string
resource:
description: Resource represents the Kubernetes resource to be propagated.
properties:
Expand Down Expand Up @@ -1279,6 +1291,12 @@ spec:
- type
type: object
type: array
lastScheduledTime:
description: LastScheduledTime representing the latest timestamp when
scheduler successfully finished a scheduling. It is represented
in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
format: date-time
type: string
schedulerObservedGeneration:
description: SchedulerObservedGeneration is the generation(.metadata.generation)
observed by the scheduler. If SchedulerObservedGeneration is less
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,18 @@ spec:
- name
type: object
type: array
rescheduleTriggeredAt:
description: "RescheduleTriggeredAt is a timestamp representing when
the referenced resource is triggered rescheduling. When this field
is updated, it means a rescheduling is manually triggered by user,
and the expected behavior of this action is to do a complete recalculation
without referring to last scheduling results. It works with the
status.lastScheduledTime field, and only when this timestamp is
later than timestamp in status.lastScheduledTime will the rescheduling
actually execute, otherwise, ignored. \n It is represented in RFC3339
form (like '2006-01-02T15:04:05Z') and is in UTC."
format: date-time
type: string
resource:
description: Resource represents the Kubernetes resource to be propagated.
properties:
Expand Down Expand Up @@ -1279,6 +1291,12 @@ spec:
- type
type: object
type: array
lastScheduledTime:
description: LastScheduledTime representing the latest timestamp when
scheduler successfully finished a scheduling. It is represented
in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
format: date-time
type: string
schedulerObservedGeneration:
description: SchedulerObservedGeneration is the generation(.metadata.generation)
observed by the scheduler. If SchedulerObservedGeneration is less
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/work/v1alpha2/binding_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,16 @@ type ResourceBindingSpec struct {
// +kubebuilder:validation:Enum=Abort;Overwrite
// +optional
ConflictResolution policyv1alpha1.ConflictResolution `json:"conflictResolution,omitempty"`

// RescheduleTriggeredAt is a timestamp representing when the referenced resource is triggered rescheduling.
// When this field is updated, it means a rescheduling is manually triggered by user, and the expected behavior
// of this action is to do a complete recalculation without referring to last scheduling results.
// It works with the status.lastScheduledTime field, and only when this timestamp is later than timestamp in
// status.lastScheduledTime will the rescheduling actually execute, otherwise, ignored.
//
// It is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
// +optional
RescheduleTriggeredAt *metav1.Time `json:"rescheduleTriggeredAt,omitempty"`
}

// ObjectReference contains enough information to locate the referenced object inside current cluster.
Expand Down Expand Up @@ -297,6 +307,11 @@ type ResourceBindingStatus struct {
// +optional
SchedulerObservedAffinityName string `json:"schedulerObservingAffinityName,omitempty"`

// LastScheduledTime representing the latest timestamp when scheduler successfully finished a scheduling.
// It is represented in RFC3339 form (like '2006-01-02T15:04:05Z') and is in UTC.
// +optional
LastScheduledTime *metav1.Time `json:"lastScheduledTime,omitempty"`

// Conditions contain the different condition statuses.
// +optional
Conditions []metav1.Condition `json:"conditions,omitempty"`
Expand Down
8 changes: 8 additions & 0 deletions pkg/apis/work/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 14 additions & 2 deletions pkg/generated/openapi/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 38 additions & 5 deletions pkg/scheduler/core/assignment.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ const (
DynamicWeightStrategy = "DynamicWeight"
)

type scheduleMode string

const (
// Lazy means that the last scheduling result will be referenced and maintained as much as possible.
Lazy scheduleMode = "Lazy"

// Shuffled means a complete recalculation without referring to the last scheduling results.
Shuffled scheduleMode = "Shuffled"
)

// assignState is a wrapper of the input for assigning function.
type assignState struct {
candidates []*clusterv1alpha1.Cluster
Expand All @@ -58,6 +68,9 @@ type assignState struct {
// fields below are indirect results
strategyType string

// scheduleMode represents the calculation mode when scheduling
scheduleMode scheduleMode

scheduledClusters []workv1alpha2.TargetCluster
assignedReplicas int32
availableClusters []workv1alpha2.TargetCluster
Expand All @@ -67,26 +80,35 @@ type assignState struct {
targetReplicas int32
}

func newAssignState(candidates []*clusterv1alpha1.Cluster, placement *policyv1alpha1.Placement, obj *workv1alpha2.ResourceBindingSpec) *assignState {
func newAssignState(candidates []*clusterv1alpha1.Cluster, spec *workv1alpha2.ResourceBindingSpec,
status *workv1alpha2.ResourceBindingStatus) *assignState {
var strategyType string

switch placement.ReplicaSchedulingType() {
switch spec.Placement.ReplicaSchedulingType() {
case policyv1alpha1.ReplicaSchedulingTypeDuplicated:
strategyType = DuplicatedStrategy
case policyv1alpha1.ReplicaSchedulingTypeDivided:
switch placement.ReplicaScheduling.ReplicaDivisionPreference {
switch spec.Placement.ReplicaScheduling.ReplicaDivisionPreference {
case policyv1alpha1.ReplicaDivisionPreferenceAggregated:
strategyType = AggregatedStrategy
case policyv1alpha1.ReplicaDivisionPreferenceWeighted:
if placement.ReplicaScheduling.WeightPreference != nil && len(placement.ReplicaScheduling.WeightPreference.DynamicWeight) != 0 {
if spec.Placement.ReplicaScheduling.WeightPreference != nil && len(spec.Placement.ReplicaScheduling.WeightPreference.DynamicWeight) != 0 {
strategyType = DynamicWeightStrategy
} else {
strategyType = StaticWeightStrategy
}
}
}

return &assignState{candidates: candidates, strategy: placement.ReplicaScheduling, spec: obj, strategyType: strategyType}
// the schedule mode is defaults to Lazy to minimize large changes in scheduling results.
expectScheduleMode := Lazy
// when spec.rescheduleTriggeredAt is updated, it represents a rescheduling is manually triggered by user, and the
// expected behavior of this action is to do a complete recalculation without referring to last scheduling results.
if util.RescheduleTriggeredAfterLastSchedule(spec.RescheduleTriggeredAt, status.LastScheduledTime) {
expectScheduleMode = Shuffled
}

return &assignState{candidates: candidates, strategy: spec.Placement.ReplicaScheduling, spec: spec, strategyType: strategyType, scheduleMode: expectScheduleMode}
}

func (as *assignState) buildScheduledClusters() {
Expand Down Expand Up @@ -179,6 +201,17 @@ func assignByStaticWeightStrategy(state *assignState) ([]workv1alpha2.TargetClus

func assignByDynamicStrategy(state *assignState) ([]workv1alpha2.TargetCluster, error) {
state.buildScheduledClusters()

// 1. when shuffled mode expectd, do a complete recalculation without referring to the last scheduling results.
if state.scheduleMode == Shuffled {
result, err := dynamicShuffledScale(state)
if err != nil {
return nil, fmt.Errorf("failed to do shuffled scale: %v", err)
}
return result, nil
}

// 2. when lazy mode expectd, try minimizing large changes in scheduling results.
if state.assignedReplicas > state.spec.Replicas {
// We need to reduce the replicas in terms of the previous result.
result, err := dynamicScaleDown(state)
Expand Down
Loading

0 comments on commit d028c18

Please sign in to comment.