Skip to content

Commit

Permalink
Use node fields instead of custom queuing algorithm to count children…
Browse files Browse the repository at this point in the history
… checked (#9154)

* Use node fields instead of custom queuing algorithm to count children checked

* Fix subtle bug introduced by defining leaf nodes

* add Franco's patch to benchmark test
  • Loading branch information
chencs authored Sep 2, 2024
1 parent c715b0a commit 0084380
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,6 @@ func TestMultiDimensionalQueueAlgorithmSlowConsumerEffects(t *testing.T) {
},
}

maxQueriersPerTenant := 0 // disable shuffle sharding

// Increase totalRequests to tighten up variations when running locally, but do not commit higher values;
// the later test cases with a higher percentage of slow queries will take a long time to run.
totalRequests := 1000
Expand All @@ -365,6 +363,12 @@ func TestMultiDimensionalQueueAlgorithmSlowConsumerEffects(t *testing.T) {
// an ingester can respond in 0.3 seconds while a slow store-gateway query can take 30 seconds
slowConsumerLatency := 100 * time.Millisecond

// enable shuffle sharding; we cannot be too restrictive with only two tenants,
// or some consumers will not get sharded to any of the two tenants.
// enabling shuffle sharding ensures we will hit cases where a querier-worker
// does not find any tenant leaf nodes it can work on under its prioritized query component node
maxQueriersPerTenant := numConsumers - 1

var testCaseNames []string
testCaseReports := map[string]*testScenarioQueueDurationReport{}

Expand Down
4 changes: 1 addition & 3 deletions pkg/scheduler/queue/tenant_querier_assignment.go
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ func (tqa *tenantQuerierAssignments) dequeueSelectNode(node *Node) (*Node, bool)
return nil, true
}

checkedAllNodes := node.childrenChecked == len(node.queueMap)+1 // must check local queue as well
checkedAllNodes := node.childrenChecked == len(node.queueMap)

// advance queue position for dequeue
tqa.tenantOrderIndex++
Expand Down Expand Up @@ -484,8 +484,6 @@ func (tqa *tenantQuerierAssignments) dequeueSelectNode(node *Node) (*Node, bool)
continue
}

checkedAllNodes = node.childrenChecked == len(node.queueMap)+1

// if the tenant-querier set is nil, any querier can serve this tenant
if tqa.tenantQuerierIDs[tenantID] == nil {
tqa.tenantOrderIndex = checkIndex
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ type QuerierWorkerQueuePriorityAlgo struct {
currentNodeOrderIndex int
nodeOrder []string
nodeCounts map[string]int
nodesChecked int
}

func NewQuerierWorkerQueuePriorityAlgo() *QuerierWorkerQueuePriorityAlgo {
Expand Down Expand Up @@ -87,8 +86,8 @@ func (qa *QuerierWorkerQueuePriorityAlgo) wrapCurrentNodeOrderIndex(increment bo
}
}

func (qa *QuerierWorkerQueuePriorityAlgo) checkedAllNodes() bool {
return qa.nodesChecked == len(qa.nodeOrder)
func (qa *QuerierWorkerQueuePriorityAlgo) checkedAllNodes(n *Node) bool {
return n.childrenChecked == len(n.queueMap)
}

func (qa *QuerierWorkerQueuePriorityAlgo) addChildNode(parent, child *Node) {
Expand Down Expand Up @@ -118,11 +117,11 @@ func (qa *QuerierWorkerQueuePriorityAlgo) addChildNode(parent, child *Node) {

func (qa *QuerierWorkerQueuePriorityAlgo) dequeueSelectNode(node *Node) (*Node, bool) {
currentNodeName := qa.nodeOrder[qa.currentNodeOrderIndex]
if node, ok := node.queueMap[currentNodeName]; ok {
qa.nodesChecked++
return node, qa.checkedAllNodes()
checkedAllNodes := qa.checkedAllNodes(node)
if childNode, ok := node.queueMap[currentNodeName]; ok {
return childNode, checkedAllNodes
}
return nil, qa.checkedAllNodes()
return nil, checkedAllNodes
}

func (qa *QuerierWorkerQueuePriorityAlgo) dequeueUpdateState(node *Node, dequeuedFrom *Node) {
Expand Down Expand Up @@ -155,7 +154,4 @@ func (qa *QuerierWorkerQueuePriorityAlgo) dequeueUpdateState(node *Node, dequeue
// delete child node from its parent's queueMap
delete(node.queueMap, childName)
}

// reset state after successful dequeue
qa.nodesChecked = 0
}

0 comments on commit 0084380

Please sign in to comment.