More power management updates for 5.12-rc1

- Address cpufreq regression introduced in 5.11 that causes CPU frequency reporting to be distorted on systems with CPPC that use acpi-cpufreq as the scaling driver (Rafael Wysocki). - Fix regression introduced during the 5.10 development cycle related to CPU hotplug and policy recreation in the qcom-cpufreq-hw driver (Shawn Guo). - Fix recent regression in the operating performance points (OPP) framework that may cause frequency updates to be skipped by mistake in some cases (Jonathan Marek). - Simplify schedutil governor code and remove a misleading comment from it (Yue Hu). - Fix kerneldoc comment typo in the cpufreq core (Yue Hu). -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAmA1UtMSHHJqd0Byand5 c29ja2kubmV0AAoJEILEb/54YlRxezIP/2oBj9fFBSLEB6NL24hO1O7Te2Jbdmpq RZbGu712eVeB+2dp7jApofwIaBuqRIB9gZBPwyIpEl9c4PbvQ8xARBfxUTBneWuG 0+y8t9YDHnTxTz2erh6/OkbCEfqijnpWqHtt9A5OiFvPT2zyjCRZ2W/+UJ66QF+O Dl79CyiDotwbMlZnYGTJSxRTia4OFT3U9qc5H0KBCDIWKCE47XpwnLDAuPu9ClY+ YW3Tp58yc/3eRcYIexjovmHN/TAF6yFMhVX2q/EGdmAraMM5+bQvymbjtA5LvQlj q68wSRa92KBxf+VVQf3Bv9gyFCgfZLz3lYSRCf/xKs4xcsA3j1PdV8QGO15rFtuG paJ+T74YAzOm4ntihU+QusCJwYpXMn87BKpCEdsVkV3bJLNWlC/9wDwlXgNvOi+0 /pzNGSCfJjyG6vXb5G2WC+iDLX1BKdLS3+adCzfMHgU2dS3kUjCUDDA400xYmW/B yNpjU6hUOqNLA2LWRgteuKP/psjJEQH6mwWWXuXsjFf+wGCHIc0U2t73LYR+JCgZ K43VsxIu2J7QWjSV9Nzff1yVNpJBlMnXr0jVQuvHh9Rkc4qvk2yU0SHEeuCXexFL rcapniJ3/1DbBK93+1ObENjbtq4XF/1FQhNRhcQew7Do54NmjuGRc1lEu+q3hbcs 5Gldg/M97C62 =PT0e -----END PGP SIGNATURE----- Merge tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm Pull more power management updates from Rafael Wysocki: "These are fixes and cleanups on top of the power management material for 5.12-rc1 merged previously. Specifics: - Address cpufreq regression introduced in 5.11 that causes CPU frequency reporting to be distorted on systems with CPPC that use acpi-cpufreq as the scaling driver (Rafael Wysocki). - Fix regression introduced during the 5.10 development cycle related to CPU hotplug and policy recreation in the qcom-cpufreq-hw driver (Shawn Guo). - Fix recent regression in the operating performance points (OPP) framework that may cause frequency updates to be skipped by mistake in some cases (Jonathan Marek). - Simplify schedutil governor code and remove a misleading comment from it (Yue Hu). - Fix kerneldoc comment typo in the cpufreq core (Yue Hu)" * tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpufreq: Fix typo in kerneldoc comment cpufreq: schedutil: Remove update_lock comment from struct sugov_policy definition cpufreq: schedutil: Remove needless sg_policy parameter from ignore_dl_rate_limit() cpufreq: ACPI: Set cpuinfo.max_freq directly if max boost is known cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks opp: Don't skip freq update for different frequency
2024-10-03 15:47:36 +00:00 · 2021-02-23 14:59:46 -08:00 · 2021-02-23 14:59:46 -08:00 · 005d3bd9e3
commit 005d3bd9e3
parent e0fbd25bb3 08c2a406b9
7 changed files with 69 additions and 67 deletions
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@ -54,7 +54,6 @@ struct acpi_cpufreq_data {
 	unsigned int resume;
 	unsigned int cpu_feature;
 	unsigned int acpi_perf_cpu;
 	unsigned int first_perf_state;
 	cpumask_var_t freqdomain_cpus;
 	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
 	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
@ -223,10 +222,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
 	perf = to_perf_data(data);
-	cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state)
+	cpufreq_for_each_entry(pos, policy->freq_table)
 		if (msr == perf->states[pos->driver_data].status)
 			return pos->frequency;
-	return policy->freq_table[data->first_perf_state].frequency;
+	return policy->freq_table[0].frequency;
 }
 static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
@ -365,7 +364,6 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	struct cpufreq_policy *policy;
 	unsigned int freq;
 	unsigned int cached_freq;
 	unsigned int state;
 	pr_debug("%s (%d)\n", __func__, cpu);
@ -377,11 +375,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	if (unlikely(!data || !policy->freq_table))
 		return 0;
-	state = to_perf_data(data)->state;
+	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
 	if (state < data->first_perf_state)
 		state = data->first_perf_state;
 	cached_freq = policy->freq_table[state].frequency;
 	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
 	if (freq != cached_freq) {
 		/*
@ -680,7 +674,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	unsigned int valid_states = 0;
 	unsigned int result = 0;
 	unsigned int state_count;
 	u64 max_boost_ratio;
 	unsigned int i;
 #ifdef CONFIG_SMP
@ -795,28 +788,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto err_unreg;
 	}
-	state_count = perf->state_count + 1;
+	freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
-
+			     GFP_KERNEL);
 	max_boost_ratio = get_max_boost_ratio(cpu);
 	if (max_boost_ratio) {
 		/*
 		 * Make a room for one more entry to represent the highest
 		 * available "boost" frequency.
 		 */
 		state_count++;
 		valid_states++;
 		data->first_perf_state = valid_states;
 	} else {
 		/*
 		 * If the maximum "boost" frequency is unknown, ask the arch
 		 * scale-invariance code to use the "nominal" performance for
 		 * CPU utilization scaling so as to prevent the schedutil
 		 * governor from selecting inadequate CPU frequencies.
 		 */
 		arch_set_max_freq_ratio(true);
 	}
 	freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL);
 	if (!freq_table) {
 		result = -ENOMEM;
 		goto err_unreg;
@ -851,27 +824,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	}
 	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
 	max_boost_ratio = get_max_boost_ratio(cpu);
 	if (max_boost_ratio) {
-		unsigned int state = data->first_perf_state;
+		unsigned int freq = freq_table[0].frequency;
 		unsigned int freq = freq_table[state].frequency;
 		/*
 		 * Because the loop above sorts the freq_table entries in the
 		 * descending order, freq is the maximum frequency in the table.
 		 * Assume that it corresponds to the CPPC nominal frequency and
-		 * use it to populate the frequency field of the extra "boost"
+		 * use it to set cpuinfo.max_freq.
 		 * frequency entry.
 		 */
-		freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
+		policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
 	} else {
 		/*
-		 * The purpose of the extra "boost" frequency entry is to make
+		 * If the maximum "boost" frequency is unknown, ask the arch
-		 * the rest of cpufreq aware of the real maximum frequency, but
+		 * scale-invariance code to use the "nominal" performance for
-		 * the way to request it is the same as for the first_perf_state
+		 * CPU utilization scaling so as to prevent the schedutil
-		 * entry that is expected to cover the entire range of "boost"
+		 * governor from selecting inadequate CPU frequencies.
 		 * frequencies of the CPU, so copy the driver_data value from
 		 * that entry.
 		 */
-		freq_table[0].driver_data = freq_table[state].driver_data;
+		arch_set_max_freq_ratio(true);
 	}
 	policy->freq_table = freq_table;
@ -947,8 +918,7 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
 {
 	struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
 							      policy->cpu);
-	struct acpi_cpufreq_data *data = policy->driver_data;
+	unsigned int freq = policy->freq_table[0].frequency;
 	unsigned int freq = policy->freq_table[data->first_perf_state].frequency;
 	if (perf->states[0].core_frequency * 1000 != freq)
 		pr_warn(FW_WARN "P-state 0 is not max freq\n");
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@ -2101,7 +2101,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
 * @cpu: Target CPU.
 * @min_perf: Minimum (required) performance level (units of @capacity).
- * @target_perf: Terget (desired) performance level (units of @capacity).
+ * @target_perf: Target (desired) performance level (units of @capacity).
 * @capacity: Capacity of the target CPU.
 *
 * Carry out a fast performance level switch of @cpu without sleeping.
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@ -52,7 +52,13 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	}
 	policy->min = policy->cpuinfo.min_freq = min_freq;
-	policy->max = policy->cpuinfo.max_freq = max_freq;
+	policy->max = max_freq;
 	/*
 	 * If the driver has set its own cpuinfo.max_freq above max_freq, leave
 	 * it as is.
 	 */
 	if (policy->cpuinfo.max_freq < max_freq)
 		policy->max = policy->cpuinfo.max_freq = max_freq;
 	if (policy->min == ~0)
 		return -EINVAL;
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data {
 struct qcom_cpufreq_data {
 	void __iomem *base;
 	struct resource *res;
 	const struct qcom_cpufreq_soc_data *soc_data;
 };
@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 	struct of_phandle_args args;
 	struct device_node *cpu_np;
 	struct device *cpu_dev;
 	struct resource *res;
 	void __iomem *base;
 	struct qcom_cpufreq_data *data;
 	int ret, index;
@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 	index = args.args[0];
-	base = devm_platform_ioremap_resource(pdev, index);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, index);
-	if (IS_ERR(base))
+	if (!res) {
-		return PTR_ERR(base);
+		dev_err(dev, "failed to get mem resource %d\n", index);
 		return -ENODEV;
 	}
-	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!request_mem_region(res->start, resource_size(res), res->name)) {
 		dev_err(dev, "failed to request resource %pR\n", res);
 		return -EBUSY;
 	}
 	base = ioremap(res->start, resource_size(res));
 	if (IS_ERR(base)) {
 		dev_err(dev, "failed to map resource %pR\n", res);
 		ret = PTR_ERR(base);
 		goto release_region;
 	}
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data) {
 		ret = -ENOMEM;
-		goto error;
+		goto unmap_base;
 	}
 	data->soc_data = of_device_get_match_data(&pdev->dev);
 	data->base = base;
 	data->res = res;
 	/* HW should be in enabled state to proceed */
 	if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) {
@ -355,7 +372,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 	return 0;
 error:
-	devm_iounmap(dev, base);
+	kfree(data);
 unmap_base:
 	iounmap(data->base);
 release_region:
 	release_mem_region(res->start, resource_size(res));
 	return ret;
 }
@ -363,12 +384,15 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct device *cpu_dev = get_cpu_device(policy->cpu);
 	struct qcom_cpufreq_data *data = policy->driver_data;
-	struct platform_device *pdev = cpufreq_get_driver_data();
+	struct resource *res = data->res;
 	void __iomem *base = data->base;
 	dev_pm_opp_remove_all_dynamic(cpu_dev);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	kfree(policy->freq_table);
-	devm_iounmap(&pdev->dev, data->base);
+	kfree(data);
 	iounmap(base);
 	release_mem_region(res->start, resource_size(res));
 	return 0;
 }
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@ -998,14 +998,15 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
 	old_opp = opp_table->current_opp;
 	/* Return early if nothing to do */
-	if (opp_table->enabled && old_opp == opp) {
+	if (old_opp == opp && opp_table->current_rate == freq &&
 	    opp_table->enabled) {
 		dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__);
 		return 0;
 	}
 	dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n",
-		__func__, old_opp->rate, freq, old_opp->level, opp->level,
+		__func__, opp_table->current_rate, freq, old_opp->level,
-		old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
+		opp->level, old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
 		opp->bandwidth ? opp->bandwidth[0].peak : 0);
 	scaling_down = _opp_compare_key(old_opp, opp);
@ -1061,6 +1062,7 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
 	/* Make sure current_opp doesn't get freed */
 	dev_pm_opp_get(opp);
 	opp_table->current_opp = opp;
 	opp_table->current_rate = freq;
 	return ret;
 }
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@ -135,6 +135,7 @@ enum opp_table_access {
 * @clock_latency_ns_max: Max clock latency in nanoseconds.
 * @parsed_static_opps: Count of devices for which OPPs are initialized from DT.
 * @shared_opp: OPP is shared between multiple devices.
 * @current_rate: Currently configured frequency.
 * @current_opp: Currently configured OPP for the table.
 * @suspend_opp: Pointer to OPP to be used during device suspend.
 * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers.
@ -184,6 +185,7 @@ struct opp_table {
 	unsigned int parsed_static_opps;
 	enum opp_table_access shared_opp;
 	unsigned long current_rate;
 	struct dev_pm_opp *current_opp;
 	struct dev_pm_opp *suspend_opp;
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@ -26,7 +26,7 @@ struct sugov_policy {
 	struct sugov_tunables	*tunables;
 	struct list_head	tunables_hook;
-	raw_spinlock_t		update_lock;	/* For shared policies */
+	raw_spinlock_t		update_lock;
 	u64			last_freq_update_time;
 	s64			freq_update_delay_ns;
 	unsigned int		next_freq;
@ -320,23 +320,21 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
 * Make sugov_should_update_freq() ignore the rate limit when DL
 * has increased the utilization.
 */
-static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
+static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
 {
 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-		sg_policy->limits_changed = true;
+		sg_cpu->sg_policy->limits_changed = true;
 }
 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
 					      u64 time, unsigned int flags)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
-	ignore_dl_rate_limit(sg_cpu, sg_policy);
+	ignore_dl_rate_limit(sg_cpu);
-	if (!sugov_should_update_freq(sg_policy, time))
+	if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
 		return false;
 	sugov_get_util(sg_cpu);
@ -451,7 +449,7 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
 	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
-	ignore_dl_rate_limit(sg_cpu, sg_policy);
+	ignore_dl_rate_limit(sg_cpu);
 	if (sugov_should_update_freq(sg_policy, time)) {
 		next_f = sugov_next_freq_shared(sg_cpu, time);