qos: Speed up pm_qos_set_value_for_cpus()

A lot of unnecessary work is done in pm_qos_set_value_for_cpus(), especially when the request being updated isn't affined to all CPUs. We can reduce the work done here significantly by only inspecting the CPUs which are affected by the updated request, and bailing out if the updated request doesn't change anything. We can make some other micro-optimizations as well knowing that this code is only for the PM_QOS_CPU_DMA_LATENCY class. Signed-off-by: Sultan Alsawaf <[email protected]>
pascua28 · Feb 20, 2025 · b770bfb · b770bfb
1 parent a348c42
commit b770bfb
Showing 1 changed file with 34 additions and 25 deletions.
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
@@ -282,12 +282,17 @@ static const struct file_operations pm_qos_debug_fops = {
 	.release        = single_release,
 };
 
-static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
+static inline int pm_qos_set_value_for_cpus(struct pm_qos_request *new_req,
+					    struct pm_qos_constraints *c,
 					    unsigned long *cpus)
 {
-	struct pm_qos_request *req = NULL;
+	s32 qos_val[NR_CPUS] = {
+		[0 ... (NR_CPUS - 1)] = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE
+	};
+	struct pm_qos_request *req;
+	unsigned long new_req_cpus;
+	bool changed = false;
 	int cpu;
-	s32 qos_val[NR_CPUS] = { [0 ... (NR_CPUS - 1)] = c->default_value };
 
 	/*
 	 * pm_qos_constraints can be from different classes,
@@ -297,32 +302,35 @@ static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
 	if (c != pm_qos_array[PM_QOS_CPU_DMA_LATENCY]->constraints)
 		return -EINVAL;
 
+	new_req_cpus = atomic_read(&new_req->cpus_affine);
+	for_each_cpu(cpu, to_cpumask(&new_req_cpus)) {
+		if (c->target_per_cpu[cpu] != new_req->node.prio) {
+			changed = true;
+			break;
+		}
+	}
+
+	if (!changed)
+		return 0;
+
 	plist_for_each_entry(req, &c->list, node) {
-		unsigned long affined_cpus = atomic_read(&req->cpus_affine);
-
-		for_each_cpu(cpu, to_cpumask(&affined_cpus)) {
-			switch (c->type) {
-			case PM_QOS_MIN:
-				if (qos_val[cpu] > req->node.prio)
-					qos_val[cpu] = req->node.prio;
-				break;
-			case PM_QOS_MAX:
-				if (req->node.prio > qos_val[cpu])
-					qos_val[cpu] = req->node.prio;
-				break;
-			case PM_QOS_SUM:
-					qos_val[cpu] += req->node.prio;
-				break;
-			default:
-				break;
-			}
+		unsigned long affected_cpus;
+
+		affected_cpus = atomic_read(&req->cpus_affine) & new_req_cpus;
+		if (!affected_cpus)
+			continue;
+
+		for_each_cpu(cpu, to_cpumask(&affected_cpus)) {
+			if (qos_val[cpu] > req->node.prio)
+				qos_val[cpu] = req->node.prio;
 		}
 	}
 
-	for_each_possible_cpu(cpu) {
-		if (c->target_per_cpu[cpu] != qos_val[cpu])
+	for_each_cpu(cpu, to_cpumask(&new_req_cpus)) {
+		if (c->target_per_cpu[cpu] != qos_val[cpu]) {
+			c->target_per_cpu[cpu] = qos_val[cpu];
 			*cpus |= BIT(cpu);
-		c->target_per_cpu[cpu] = qos_val[cpu];
+		}
 	}
 
 	return 0;
@@ -342,6 +350,7 @@ static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
 int __always_inline pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 			 enum pm_qos_req_action action, int value)
 {
+	struct pm_qos_request *req = container_of(node, typeof(*req), node);
 	int prev_value, curr_value, new_value;
 	unsigned long cpus = 0;
 	int ret;
@@ -375,7 +384,7 @@ int __always_inline pm_qos_update_target(struct pm_qos_constraints *c, struct pl
 
 	curr_value = pm_qos_get_value(c);
 	pm_qos_set_value(c, curr_value);
-	ret = pm_qos_set_value_for_cpus(c, &cpus);
+	ret = pm_qos_set_value_for_cpus(req, c, &cpus);
 
 	spin_unlock(&pm_qos_lock);