More power management updates for 5.12-rc1

- Address cpufreq regression introduced in 5.11 that causes
    CPU frequency reporting to be distorted on systems with CPPC
    that use acpi-cpufreq as the scaling driver (Rafael Wysocki).
 
  - Fix regression introduced during the 5.10 development cycle
    related to CPU hotplug and policy recreation in the
    qcom-cpufreq-hw driver (Shawn Guo).
 
  - Fix recent regression in the operating performance points (OPP)
    framework that may cause frequency updates to be skipped by
    mistake in some cases (Jonathan Marek).
 
  - Simplify schedutil governor code and remove a misleading comment
    from it (Yue Hu).
 
  - Fix kerneldoc comment typo in the cpufreq core (Yue Hu).
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAmA1UtMSHHJqd0Byand5
 c29ja2kubmV0AAoJEILEb/54YlRxezIP/2oBj9fFBSLEB6NL24hO1O7Te2Jbdmpq
 RZbGu712eVeB+2dp7jApofwIaBuqRIB9gZBPwyIpEl9c4PbvQ8xARBfxUTBneWuG
 0+y8t9YDHnTxTz2erh6/OkbCEfqijnpWqHtt9A5OiFvPT2zyjCRZ2W/+UJ66QF+O
 Dl79CyiDotwbMlZnYGTJSxRTia4OFT3U9qc5H0KBCDIWKCE47XpwnLDAuPu9ClY+
 YW3Tp58yc/3eRcYIexjovmHN/TAF6yFMhVX2q/EGdmAraMM5+bQvymbjtA5LvQlj
 q68wSRa92KBxf+VVQf3Bv9gyFCgfZLz3lYSRCf/xKs4xcsA3j1PdV8QGO15rFtuG
 paJ+T74YAzOm4ntihU+QusCJwYpXMn87BKpCEdsVkV3bJLNWlC/9wDwlXgNvOi+0
 /pzNGSCfJjyG6vXb5G2WC+iDLX1BKdLS3+adCzfMHgU2dS3kUjCUDDA400xYmW/B
 yNpjU6hUOqNLA2LWRgteuKP/psjJEQH6mwWWXuXsjFf+wGCHIc0U2t73LYR+JCgZ
 K43VsxIu2J7QWjSV9Nzff1yVNpJBlMnXr0jVQuvHh9Rkc4qvk2yU0SHEeuCXexFL
 rcapniJ3/1DbBK93+1ObENjbtq4XF/1FQhNRhcQew7Do54NmjuGRc1lEu+q3hbcs
 5Gldg/M97C62
 =PT0e
 -----END PGP SIGNATURE-----

Merge tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull more power management updates from Rafael Wysocki:
 "These are fixes and cleanups on top of the power management material
  for 5.12-rc1 merged previously.

  Specifics:

   - Address cpufreq regression introduced in 5.11 that causes CPU
     frequency reporting to be distorted on systems with CPPC that use
     acpi-cpufreq as the scaling driver (Rafael Wysocki).

   - Fix regression introduced during the 5.10 development cycle related
     to CPU hotplug and policy recreation in the qcom-cpufreq-hw driver
     (Shawn Guo).

   - Fix recent regression in the operating performance points (OPP)
     framework that may cause frequency updates to be skipped by mistake
     in some cases (Jonathan Marek).

   - Simplify schedutil governor code and remove a misleading comment
     from it (Yue Hu).

   - Fix kerneldoc comment typo in the cpufreq core (Yue Hu)"

* tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: Fix typo in kerneldoc comment
  cpufreq: schedutil: Remove update_lock comment from struct sugov_policy definition
  cpufreq: schedutil: Remove needless sg_policy parameter from ignore_dl_rate_limit()
  cpufreq: ACPI: Set cpuinfo.max_freq directly if max boost is known
  cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks
  opp: Don't skip freq update for different frequency
This commit is contained in:
Linus Torvalds 2021-02-23 14:59:46 -08:00
commit 005d3bd9e3
7 changed files with 69 additions and 67 deletions

View file

@ -54,7 +54,6 @@ struct acpi_cpufreq_data {
unsigned int resume; unsigned int resume;
unsigned int cpu_feature; unsigned int cpu_feature;
unsigned int acpi_perf_cpu; unsigned int acpi_perf_cpu;
unsigned int first_perf_state;
cpumask_var_t freqdomain_cpus; cpumask_var_t freqdomain_cpus;
void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
u32 (*cpu_freq_read)(struct acpi_pct_register *reg); u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
@ -223,10 +222,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
perf = to_perf_data(data); perf = to_perf_data(data);
cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state) cpufreq_for_each_entry(pos, policy->freq_table)
if (msr == perf->states[pos->driver_data].status) if (msr == perf->states[pos->driver_data].status)
return pos->frequency; return pos->frequency;
return policy->freq_table[data->first_perf_state].frequency; return policy->freq_table[0].frequency;
} }
static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
@ -365,7 +364,6 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
unsigned int freq; unsigned int freq;
unsigned int cached_freq; unsigned int cached_freq;
unsigned int state;
pr_debug("%s (%d)\n", __func__, cpu); pr_debug("%s (%d)\n", __func__, cpu);
@ -377,11 +375,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
if (unlikely(!data || !policy->freq_table)) if (unlikely(!data || !policy->freq_table))
return 0; return 0;
state = to_perf_data(data)->state; cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
if (state < data->first_perf_state)
state = data->first_perf_state;
cached_freq = policy->freq_table[state].frequency;
freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
if (freq != cached_freq) { if (freq != cached_freq) {
/* /*
@ -680,7 +674,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *c = &cpu_data(cpu);
unsigned int valid_states = 0; unsigned int valid_states = 0;
unsigned int result = 0; unsigned int result = 0;
unsigned int state_count;
u64 max_boost_ratio; u64 max_boost_ratio;
unsigned int i; unsigned int i;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
@ -795,28 +788,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
goto err_unreg; goto err_unreg;
} }
state_count = perf->state_count + 1; freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
GFP_KERNEL);
max_boost_ratio = get_max_boost_ratio(cpu);
if (max_boost_ratio) {
/*
* Make a room for one more entry to represent the highest
* available "boost" frequency.
*/
state_count++;
valid_states++;
data->first_perf_state = valid_states;
} else {
/*
* If the maximum "boost" frequency is unknown, ask the arch
* scale-invariance code to use the "nominal" performance for
* CPU utilization scaling so as to prevent the schedutil
* governor from selecting inadequate CPU frequencies.
*/
arch_set_max_freq_ratio(true);
}
freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL);
if (!freq_table) { if (!freq_table) {
result = -ENOMEM; result = -ENOMEM;
goto err_unreg; goto err_unreg;
@ -851,27 +824,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
} }
freq_table[valid_states].frequency = CPUFREQ_TABLE_END; freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
max_boost_ratio = get_max_boost_ratio(cpu);
if (max_boost_ratio) { if (max_boost_ratio) {
unsigned int state = data->first_perf_state; unsigned int freq = freq_table[0].frequency;
unsigned int freq = freq_table[state].frequency;
/* /*
* Because the loop above sorts the freq_table entries in the * Because the loop above sorts the freq_table entries in the
* descending order, freq is the maximum frequency in the table. * descending order, freq is the maximum frequency in the table.
* Assume that it corresponds to the CPPC nominal frequency and * Assume that it corresponds to the CPPC nominal frequency and
* use it to populate the frequency field of the extra "boost" * use it to set cpuinfo.max_freq.
* frequency entry.
*/ */
freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
} else {
/* /*
* The purpose of the extra "boost" frequency entry is to make * If the maximum "boost" frequency is unknown, ask the arch
* the rest of cpufreq aware of the real maximum frequency, but * scale-invariance code to use the "nominal" performance for
* the way to request it is the same as for the first_perf_state * CPU utilization scaling so as to prevent the schedutil
* entry that is expected to cover the entire range of "boost" * governor from selecting inadequate CPU frequencies.
* frequencies of the CPU, so copy the driver_data value from
* that entry.
*/ */
freq_table[0].driver_data = freq_table[state].driver_data; arch_set_max_freq_ratio(true);
} }
policy->freq_table = freq_table; policy->freq_table = freq_table;
@ -947,8 +918,7 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
{ {
struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data, struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
policy->cpu); policy->cpu);
struct acpi_cpufreq_data *data = policy->driver_data; unsigned int freq = policy->freq_table[0].frequency;
unsigned int freq = policy->freq_table[data->first_perf_state].frequency;
if (perf->states[0].core_frequency * 1000 != freq) if (perf->states[0].core_frequency * 1000 != freq)
pr_warn(FW_WARN "P-state 0 is not max freq\n"); pr_warn(FW_WARN "P-state 0 is not max freq\n");

View file

@ -2101,7 +2101,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
* cpufreq_driver_adjust_perf - Adjust CPU performance level in one go. * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
* @cpu: Target CPU. * @cpu: Target CPU.
* @min_perf: Minimum (required) performance level (units of @capacity). * @min_perf: Minimum (required) performance level (units of @capacity).
* @target_perf: Terget (desired) performance level (units of @capacity). * @target_perf: Target (desired) performance level (units of @capacity).
* @capacity: Capacity of the target CPU. * @capacity: Capacity of the target CPU.
* *
* Carry out a fast performance level switch of @cpu without sleeping. * Carry out a fast performance level switch of @cpu without sleeping.

View file

@ -52,7 +52,13 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
} }
policy->min = policy->cpuinfo.min_freq = min_freq; policy->min = policy->cpuinfo.min_freq = min_freq;
policy->max = policy->cpuinfo.max_freq = max_freq; policy->max = max_freq;
/*
* If the driver has set its own cpuinfo.max_freq above max_freq, leave
* it as is.
*/
if (policy->cpuinfo.max_freq < max_freq)
policy->max = policy->cpuinfo.max_freq = max_freq;
if (policy->min == ~0) if (policy->min == ~0)
return -EINVAL; return -EINVAL;

View file

@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data {
struct qcom_cpufreq_data { struct qcom_cpufreq_data {
void __iomem *base; void __iomem *base;
struct resource *res;
const struct qcom_cpufreq_soc_data *soc_data; const struct qcom_cpufreq_soc_data *soc_data;
}; };
@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
struct of_phandle_args args; struct of_phandle_args args;
struct device_node *cpu_np; struct device_node *cpu_np;
struct device *cpu_dev; struct device *cpu_dev;
struct resource *res;
void __iomem *base; void __iomem *base;
struct qcom_cpufreq_data *data; struct qcom_cpufreq_data *data;
int ret, index; int ret, index;
@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
index = args.args[0]; index = args.args[0];
base = devm_platform_ioremap_resource(pdev, index); res = platform_get_resource(pdev, IORESOURCE_MEM, index);
if (IS_ERR(base)) if (!res) {
return PTR_ERR(base); dev_err(dev, "failed to get mem resource %d\n", index);
return -ENODEV;
}
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!request_mem_region(res->start, resource_size(res), res->name)) {
dev_err(dev, "failed to request resource %pR\n", res);
return -EBUSY;
}
base = ioremap(res->start, resource_size(res));
if (IS_ERR(base)) {
dev_err(dev, "failed to map resource %pR\n", res);
ret = PTR_ERR(base);
goto release_region;
}
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) { if (!data) {
ret = -ENOMEM; ret = -ENOMEM;
goto error; goto unmap_base;
} }
data->soc_data = of_device_get_match_data(&pdev->dev); data->soc_data = of_device_get_match_data(&pdev->dev);
data->base = base; data->base = base;
data->res = res;
/* HW should be in enabled state to proceed */ /* HW should be in enabled state to proceed */
if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) {
@ -355,7 +372,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
return 0; return 0;
error: error:
devm_iounmap(dev, base); kfree(data);
unmap_base:
iounmap(data->base);
release_region:
release_mem_region(res->start, resource_size(res));
return ret; return ret;
} }
@ -363,12 +384,15 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
{ {
struct device *cpu_dev = get_cpu_device(policy->cpu); struct device *cpu_dev = get_cpu_device(policy->cpu);
struct qcom_cpufreq_data *data = policy->driver_data; struct qcom_cpufreq_data *data = policy->driver_data;
struct platform_device *pdev = cpufreq_get_driver_data(); struct resource *res = data->res;
void __iomem *base = data->base;
dev_pm_opp_remove_all_dynamic(cpu_dev); dev_pm_opp_remove_all_dynamic(cpu_dev);
dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
kfree(policy->freq_table); kfree(policy->freq_table);
devm_iounmap(&pdev->dev, data->base); kfree(data);
iounmap(base);
release_mem_region(res->start, resource_size(res));
return 0; return 0;
} }

View file

@ -998,14 +998,15 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
old_opp = opp_table->current_opp; old_opp = opp_table->current_opp;
/* Return early if nothing to do */ /* Return early if nothing to do */
if (opp_table->enabled && old_opp == opp) { if (old_opp == opp && opp_table->current_rate == freq &&
opp_table->enabled) {
dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__); dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__);
return 0; return 0;
} }
dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n", dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n",
__func__, old_opp->rate, freq, old_opp->level, opp->level, __func__, opp_table->current_rate, freq, old_opp->level,
old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0, opp->level, old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
opp->bandwidth ? opp->bandwidth[0].peak : 0); opp->bandwidth ? opp->bandwidth[0].peak : 0);
scaling_down = _opp_compare_key(old_opp, opp); scaling_down = _opp_compare_key(old_opp, opp);
@ -1061,6 +1062,7 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
/* Make sure current_opp doesn't get freed */ /* Make sure current_opp doesn't get freed */
dev_pm_opp_get(opp); dev_pm_opp_get(opp);
opp_table->current_opp = opp; opp_table->current_opp = opp;
opp_table->current_rate = freq;
return ret; return ret;
} }

View file

@ -135,6 +135,7 @@ enum opp_table_access {
* @clock_latency_ns_max: Max clock latency in nanoseconds. * @clock_latency_ns_max: Max clock latency in nanoseconds.
* @parsed_static_opps: Count of devices for which OPPs are initialized from DT. * @parsed_static_opps: Count of devices for which OPPs are initialized from DT.
* @shared_opp: OPP is shared between multiple devices. * @shared_opp: OPP is shared between multiple devices.
* @current_rate: Currently configured frequency.
* @current_opp: Currently configured OPP for the table. * @current_opp: Currently configured OPP for the table.
* @suspend_opp: Pointer to OPP to be used during device suspend. * @suspend_opp: Pointer to OPP to be used during device suspend.
* @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers. * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers.
@ -184,6 +185,7 @@ struct opp_table {
unsigned int parsed_static_opps; unsigned int parsed_static_opps;
enum opp_table_access shared_opp; enum opp_table_access shared_opp;
unsigned long current_rate;
struct dev_pm_opp *current_opp; struct dev_pm_opp *current_opp;
struct dev_pm_opp *suspend_opp; struct dev_pm_opp *suspend_opp;

View file

@ -26,7 +26,7 @@ struct sugov_policy {
struct sugov_tunables *tunables; struct sugov_tunables *tunables;
struct list_head tunables_hook; struct list_head tunables_hook;
raw_spinlock_t update_lock; /* For shared policies */ raw_spinlock_t update_lock;
u64 last_freq_update_time; u64 last_freq_update_time;
s64 freq_update_delay_ns; s64 freq_update_delay_ns;
unsigned int next_freq; unsigned int next_freq;
@ -320,23 +320,21 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
* Make sugov_should_update_freq() ignore the rate limit when DL * Make sugov_should_update_freq() ignore the rate limit when DL
* has increased the utilization. * has increased the utilization.
*/ */
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
{ {
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
sg_policy->limits_changed = true; sg_cpu->sg_policy->limits_changed = true;
} }
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
u64 time, unsigned int flags) u64 time, unsigned int flags)
{ {
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
sugov_iowait_boost(sg_cpu, time, flags); sugov_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time; sg_cpu->last_update = time;
ignore_dl_rate_limit(sg_cpu, sg_policy); ignore_dl_rate_limit(sg_cpu);
if (!sugov_should_update_freq(sg_policy, time)) if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
return false; return false;
sugov_get_util(sg_cpu); sugov_get_util(sg_cpu);
@ -451,7 +449,7 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
sugov_iowait_boost(sg_cpu, time, flags); sugov_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time; sg_cpu->last_update = time;
ignore_dl_rate_limit(sg_cpu, sg_policy); ignore_dl_rate_limit(sg_cpu);
if (sugov_should_update_freq(sg_policy, time)) { if (sugov_should_update_freq(sg_policy, time)) {
next_f = sugov_next_freq_shared(sg_cpu, time); next_f = sugov_next_freq_shared(sg_cpu, time);