More power management updates for 5.12-rc1

- Address cpufreq regression introduced in 5.11 that causes
    CPU frequency reporting to be distorted on systems with CPPC
    that use acpi-cpufreq as the scaling driver (Rafael Wysocki).
 
  - Fix regression introduced during the 5.10 development cycle
    related to CPU hotplug and policy recreation in the
    qcom-cpufreq-hw driver (Shawn Guo).
 
  - Fix recent regression in the operating performance points (OPP)
    framework that may cause frequency updates to be skipped by
    mistake in some cases (Jonathan Marek).
 
  - Simplify schedutil governor code and remove a misleading comment
    from it (Yue Hu).
 
  - Fix kerneldoc comment typo in the cpufreq core (Yue Hu).
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAmA1UtMSHHJqd0Byand5
 c29ja2kubmV0AAoJEILEb/54YlRxezIP/2oBj9fFBSLEB6NL24hO1O7Te2Jbdmpq
 RZbGu712eVeB+2dp7jApofwIaBuqRIB9gZBPwyIpEl9c4PbvQ8xARBfxUTBneWuG
 0+y8t9YDHnTxTz2erh6/OkbCEfqijnpWqHtt9A5OiFvPT2zyjCRZ2W/+UJ66QF+O
 Dl79CyiDotwbMlZnYGTJSxRTia4OFT3U9qc5H0KBCDIWKCE47XpwnLDAuPu9ClY+
 YW3Tp58yc/3eRcYIexjovmHN/TAF6yFMhVX2q/EGdmAraMM5+bQvymbjtA5LvQlj
 q68wSRa92KBxf+VVQf3Bv9gyFCgfZLz3lYSRCf/xKs4xcsA3j1PdV8QGO15rFtuG
 paJ+T74YAzOm4ntihU+QusCJwYpXMn87BKpCEdsVkV3bJLNWlC/9wDwlXgNvOi+0
 /pzNGSCfJjyG6vXb5G2WC+iDLX1BKdLS3+adCzfMHgU2dS3kUjCUDDA400xYmW/B
 yNpjU6hUOqNLA2LWRgteuKP/psjJEQH6mwWWXuXsjFf+wGCHIc0U2t73LYR+JCgZ
 K43VsxIu2J7QWjSV9Nzff1yVNpJBlMnXr0jVQuvHh9Rkc4qvk2yU0SHEeuCXexFL
 rcapniJ3/1DbBK93+1ObENjbtq4XF/1FQhNRhcQew7Do54NmjuGRc1lEu+q3hbcs
 5Gldg/M97C62
 =PT0e
 -----END PGP SIGNATURE-----

Merge tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull more power management updates from Rafael Wysocki:
 "These are fixes and cleanups on top of the power management material
  for 5.12-rc1 merged previously.

  Specifics:

   - Address cpufreq regression introduced in 5.11 that causes CPU
     frequency reporting to be distorted on systems with CPPC that use
     acpi-cpufreq as the scaling driver (Rafael Wysocki).

   - Fix regression introduced during the 5.10 development cycle related
     to CPU hotplug and policy recreation in the qcom-cpufreq-hw driver
     (Shawn Guo).

   - Fix recent regression in the operating performance points (OPP)
     framework that may cause frequency updates to be skipped by mistake
     in some cases (Jonathan Marek).

   - Simplify schedutil governor code and remove a misleading comment
     from it (Yue Hu).

   - Fix kerneldoc comment typo in the cpufreq core (Yue Hu)"

* tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: Fix typo in kerneldoc comment
  cpufreq: schedutil: Remove update_lock comment from struct sugov_policy definition
  cpufreq: schedutil: Remove needless sg_policy parameter from ignore_dl_rate_limit()
  cpufreq: ACPI: Set cpuinfo.max_freq directly if max boost is known
  cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks
  opp: Don't skip freq update for different frequency
This commit is contained in:
Linus Torvalds 2021-02-23 14:59:46 -08:00
commit 005d3bd9e3
7 changed files with 69 additions and 67 deletions

View File

@ -54,7 +54,6 @@ struct acpi_cpufreq_data {
unsigned int resume;
unsigned int cpu_feature;
unsigned int acpi_perf_cpu;
unsigned int first_perf_state;
cpumask_var_t freqdomain_cpus;
void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
@ -223,10 +222,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
perf = to_perf_data(data);
cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state)
cpufreq_for_each_entry(pos, policy->freq_table)
if (msr == perf->states[pos->driver_data].status)
return pos->frequency;
return policy->freq_table[data->first_perf_state].frequency;
return policy->freq_table[0].frequency;
}
static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
@ -365,7 +364,6 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
struct cpufreq_policy *policy;
unsigned int freq;
unsigned int cached_freq;
unsigned int state;
pr_debug("%s (%d)\n", __func__, cpu);
@ -377,11 +375,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
if (unlikely(!data || !policy->freq_table))
return 0;
state = to_perf_data(data)->state;
if (state < data->first_perf_state)
state = data->first_perf_state;
cached_freq = policy->freq_table[state].frequency;
cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
if (freq != cached_freq) {
/*
@ -680,7 +674,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
struct cpuinfo_x86 *c = &cpu_data(cpu);
unsigned int valid_states = 0;
unsigned int result = 0;
unsigned int state_count;
u64 max_boost_ratio;
unsigned int i;
#ifdef CONFIG_SMP
@ -795,28 +788,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
goto err_unreg;
}
state_count = perf->state_count + 1;
max_boost_ratio = get_max_boost_ratio(cpu);
if (max_boost_ratio) {
/*
* Make a room for one more entry to represent the highest
* available "boost" frequency.
*/
state_count++;
valid_states++;
data->first_perf_state = valid_states;
} else {
/*
* If the maximum "boost" frequency is unknown, ask the arch
* scale-invariance code to use the "nominal" performance for
* CPU utilization scaling so as to prevent the schedutil
* governor from selecting inadequate CPU frequencies.
*/
arch_set_max_freq_ratio(true);
}
freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL);
freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
GFP_KERNEL);
if (!freq_table) {
result = -ENOMEM;
goto err_unreg;
@ -851,27 +824,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
}
freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
max_boost_ratio = get_max_boost_ratio(cpu);
if (max_boost_ratio) {
unsigned int state = data->first_perf_state;
unsigned int freq = freq_table[state].frequency;
unsigned int freq = freq_table[0].frequency;
/*
* Because the loop above sorts the freq_table entries in the
* descending order, freq is the maximum frequency in the table.
* Assume that it corresponds to the CPPC nominal frequency and
* use it to populate the frequency field of the extra "boost"
* frequency entry.
* use it to set cpuinfo.max_freq.
*/
freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
} else {
/*
* The purpose of the extra "boost" frequency entry is to make
* the rest of cpufreq aware of the real maximum frequency, but
* the way to request it is the same as for the first_perf_state
* entry that is expected to cover the entire range of "boost"
* frequencies of the CPU, so copy the driver_data value from
* that entry.
* If the maximum "boost" frequency is unknown, ask the arch
* scale-invariance code to use the "nominal" performance for
* CPU utilization scaling so as to prevent the schedutil
* governor from selecting inadequate CPU frequencies.
*/
freq_table[0].driver_data = freq_table[state].driver_data;
arch_set_max_freq_ratio(true);
}
policy->freq_table = freq_table;
@ -947,8 +918,7 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
{
struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
policy->cpu);
struct acpi_cpufreq_data *data = policy->driver_data;
unsigned int freq = policy->freq_table[data->first_perf_state].frequency;
unsigned int freq = policy->freq_table[0].frequency;
if (perf->states[0].core_frequency * 1000 != freq)
pr_warn(FW_WARN "P-state 0 is not max freq\n");

View File

@ -2101,7 +2101,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
* cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
* @cpu: Target CPU.
* @min_perf: Minimum (required) performance level (units of @capacity).
* @target_perf: Terget (desired) performance level (units of @capacity).
* @target_perf: Target (desired) performance level (units of @capacity).
* @capacity: Capacity of the target CPU.
*
* Carry out a fast performance level switch of @cpu without sleeping.

View File

@ -52,7 +52,13 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
}
policy->min = policy->cpuinfo.min_freq = min_freq;
policy->max = policy->cpuinfo.max_freq = max_freq;
policy->max = max_freq;
/*
* If the driver has set its own cpuinfo.max_freq above max_freq, leave
* it as is.
*/
if (policy->cpuinfo.max_freq < max_freq)
policy->max = policy->cpuinfo.max_freq = max_freq;
if (policy->min == ~0)
return -EINVAL;

View File

@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data {
struct qcom_cpufreq_data {
void __iomem *base;
struct resource *res;
const struct qcom_cpufreq_soc_data *soc_data;
};
@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
struct of_phandle_args args;
struct device_node *cpu_np;
struct device *cpu_dev;
struct resource *res;
void __iomem *base;
struct qcom_cpufreq_data *data;
int ret, index;
@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
index = args.args[0];
base = devm_platform_ioremap_resource(pdev, index);
if (IS_ERR(base))
return PTR_ERR(base);
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
if (!res) {
dev_err(dev, "failed to get mem resource %d\n", index);
return -ENODEV;
}
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!request_mem_region(res->start, resource_size(res), res->name)) {
dev_err(dev, "failed to request resource %pR\n", res);
return -EBUSY;
}
base = ioremap(res->start, resource_size(res));
if (IS_ERR(base)) {
dev_err(dev, "failed to map resource %pR\n", res);
ret = PTR_ERR(base);
goto release_region;
}
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) {
ret = -ENOMEM;
goto error;
goto unmap_base;
}
data->soc_data = of_device_get_match_data(&pdev->dev);
data->base = base;
data->res = res;
/* HW should be in enabled state to proceed */
if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) {
@ -355,7 +372,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
return 0;
error:
devm_iounmap(dev, base);
kfree(data);
unmap_base:
iounmap(data->base);
release_region:
release_mem_region(res->start, resource_size(res));
return ret;
}
@ -363,12 +384,15 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
{
struct device *cpu_dev = get_cpu_device(policy->cpu);
struct qcom_cpufreq_data *data = policy->driver_data;
struct platform_device *pdev = cpufreq_get_driver_data();
struct resource *res = data->res;
void __iomem *base = data->base;
dev_pm_opp_remove_all_dynamic(cpu_dev);
dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
kfree(policy->freq_table);
devm_iounmap(&pdev->dev, data->base);
kfree(data);
iounmap(base);
release_mem_region(res->start, resource_size(res));
return 0;
}

View File

@ -998,14 +998,15 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
old_opp = opp_table->current_opp;
/* Return early if nothing to do */
if (opp_table->enabled && old_opp == opp) {
if (old_opp == opp && opp_table->current_rate == freq &&
opp_table->enabled) {
dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__);
return 0;
}
dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n",
__func__, old_opp->rate, freq, old_opp->level, opp->level,
old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
__func__, opp_table->current_rate, freq, old_opp->level,
opp->level, old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
opp->bandwidth ? opp->bandwidth[0].peak : 0);
scaling_down = _opp_compare_key(old_opp, opp);
@ -1061,6 +1062,7 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
/* Make sure current_opp doesn't get freed */
dev_pm_opp_get(opp);
opp_table->current_opp = opp;
opp_table->current_rate = freq;
return ret;
}

View File

@ -135,6 +135,7 @@ enum opp_table_access {
* @clock_latency_ns_max: Max clock latency in nanoseconds.
* @parsed_static_opps: Count of devices for which OPPs are initialized from DT.
* @shared_opp: OPP is shared between multiple devices.
* @current_rate: Currently configured frequency.
* @current_opp: Currently configured OPP for the table.
* @suspend_opp: Pointer to OPP to be used during device suspend.
* @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers.
@ -184,6 +185,7 @@ struct opp_table {
unsigned int parsed_static_opps;
enum opp_table_access shared_opp;
unsigned long current_rate;
struct dev_pm_opp *current_opp;
struct dev_pm_opp *suspend_opp;

View File

@ -26,7 +26,7 @@ struct sugov_policy {
struct sugov_tunables *tunables;
struct list_head tunables_hook;
raw_spinlock_t update_lock; /* For shared policies */
raw_spinlock_t update_lock;
u64 last_freq_update_time;
s64 freq_update_delay_ns;
unsigned int next_freq;
@ -320,23 +320,21 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
* Make sugov_should_update_freq() ignore the rate limit when DL
* has increased the utilization.
*/
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
{
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
sg_policy->limits_changed = true;
sg_cpu->sg_policy->limits_changed = true;
}
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
u64 time, unsigned int flags)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
sugov_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
ignore_dl_rate_limit(sg_cpu, sg_policy);
ignore_dl_rate_limit(sg_cpu);
if (!sugov_should_update_freq(sg_policy, time))
if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
return false;
sugov_get_util(sg_cpu);
@ -451,7 +449,7 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
sugov_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
ignore_dl_rate_limit(sg_cpu, sg_policy);
ignore_dl_rate_limit(sg_cpu);
if (sugov_should_update_freq(sg_policy, time)) {
next_f = sugov_next_freq_shared(sg_cpu, time);