Merge Energy Model material for 5.19 to satisfy dependencies.

This commit is contained in:
Rafael J. Wysocki 2022-05-06 21:00:44 +02:00
commit 46acb9d9b6
9 changed files with 101 additions and 47 deletions

View file

@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
(static + dynamic). These power values might be coming directly from
experiments and measurements.
Registration of 'artificial' EM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There is an option to provide a custom callback for drivers missing detailed
knowledge about power value for each performance state. The callback
.get_cost() is optional and provides the 'cost' values used by the EAS.
This is useful for platforms that only provide information on relative
efficiency between CPU types, where one could use the information to
create an abstract power model. But even an abstract power model can
sometimes be hard to fit in, given the input power value size restrictions.
The .get_cost() allows to provide the 'cost' values which reflect the
efficiency of the CPUs. This would allow to provide EAS information which
has different relation than what would be forced by the EM internal
formulas calculating 'cost' values. To register an EM for such platform, the
driver must set the flag 'milliwatts' to 0, provide .get_power() callback
and provide .get_cost() callback. The EM framework would handle such platform
properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
platform. Special care should be taken by other frameworks which are using EM
to test and treat this flag properly.
Registration of 'simple' EM
~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -181,8 +201,8 @@ EM framework::
-> drivers/cpufreq/foo_cpufreq.c
01 static int est_power(unsigned long *mW, unsigned long *KHz,
02 struct device *dev)
01 static int est_power(struct device *dev, unsigned long *mW,
02 unsigned long *KHz)
03 {
04 long freq, power;
05

View file

@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
};
static int __maybe_unused
mtk_cpufreq_get_cpu_power(unsigned long *mW,
unsigned long *KHz, struct device *cpu_dev)
mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
unsigned long *KHz)
{
struct mtk_cpufreq_data *data;
struct cpufreq_policy *policy;

View file

@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
}
static int __maybe_unused
scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
struct device *cpu_dev)
scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
unsigned long *KHz)
{
unsigned long Hz;
int ret, domain;

View file

@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
*/
static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
struct device *dev)
static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
unsigned long *kHz)
{
struct dev_pm_opp *opp;
struct device_node *np;

View file

@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
return 0;
pd = em_cpu_get(cpu);
if (!pd)
if (!pd || em_is_artificial(pd))
return -EINVAL;
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);

View file

@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
struct cpufreq_policy *policy;
unsigned int nr_levels;
if (!em)
if (!em || em_is_artificial(em))
return false;
policy = cpufreq_cdev->policy;

View file

@ -358,6 +358,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
struct em_perf_domain *em;
char *name;
int err, num_opps;
@ -367,8 +368,9 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
dfc->devfreq = df;
dfc->em_pd = em_pd_get(dev);
if (dfc->em_pd) {
em = em_pd_get(dev);
if (em && !em_is_artificial(em)) {
dfc->em_pd = em;
devfreq_cooling_ops.get_requested_power =
devfreq_cooling_get_requested_power;
devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
@ -379,7 +381,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
num_opps = em_pd_nr_perf_states(dfc->em_pd);
} else {
/* Backward compatibility for drivers which do not use IPA */
dev_dbg(dev, "missing EM for cooling device\n");
dev_dbg(dev, "missing proper EM for cooling device\n");
num_opps = dev_pm_opp_get_opp_count(dev);

View file

@ -67,11 +67,16 @@ struct em_perf_domain {
*
* EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
* energy consumption.
*
* EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be
* created by platform missing real power information
*/
#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2)
#define em_span_cpus(em) (to_cpumask((em)->cpus))
#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL)
#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
@ -96,11 +101,11 @@ struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
* a device
* @dev : Device for which we do this operation (can be a CPU)
* @power : Active power at the performance state
* (modified)
* @freq : Frequency at the performance state in kHz
* (modified)
* @dev : Device for which we do this operation (can be a CPU)
*
* active_power() must find the lowest performance state of 'dev' above
* 'freq' and update 'power' and 'freq' to the matching active power
@ -112,11 +117,32 @@ struct em_data_callback {
*
* Return 0 on success.
*/
int (*active_power)(unsigned long *power, unsigned long *freq,
struct device *dev);
int (*active_power)(struct device *dev, unsigned long *power,
unsigned long *freq);
/**
* get_cost() - Provide the cost at the given performance state of
* a device
* @dev : Device for which we do this operation (can be a CPU)
* @freq : Frequency at the performance state in kHz
* @cost : The cost value for the performance state
* (modified)
*
* In case of CPUs, the cost is the one of a single CPU in the domain.
* It is expected to fit in the [0, EM_MAX_POWER] range due to internal
* usage in EAS calculation.
*
* Return 0 on success, or appropriate error value in case of failure.
*/
int (*get_cost)(struct device *dev, unsigned long freq,
unsigned long *cost);
};
#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb)
#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \
{ .active_power = _active_power_cb, \
.get_cost = _cost_cb }
#define EM_DATA_CB(_active_power_cb) \
EM_ADV_DATA_CB(_active_power_cb, NULL)
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
@ -264,6 +290,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
#else
struct em_data_callback {};
#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
#define EM_DATA_CB(_active_power_cb) { }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0)

View file

@ -54,28 +54,15 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
static int em_debug_units_show(struct seq_file *s, void *unused)
static int em_debug_flags_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
"milliWatts" : "bogoWatts";
seq_printf(s, "%s\n", units);
seq_printf(s, "%#lx\n", pd->flags);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(em_debug_units);
static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
seq_printf(s, "%d\n", enabled);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
static void em_debug_create_pd(struct device *dev)
{
@ -89,9 +76,8 @@ static void em_debug_create_pd(struct device *dev)
debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
&em_debug_cpus_fops);
debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
&em_debug_skip_inefficiencies_fops);
debugfs_create_file("flags", 0444, d, dev->em_pd,
&em_debug_flags_fops);
/* Create a sub-directory for each performance state */
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
@ -121,7 +107,8 @@ static void em_debug_remove_pd(struct device *dev) {}
#endif
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
int nr_states, struct em_data_callback *cb)
int nr_states, struct em_data_callback *cb,
unsigned long flags)
{
unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
struct em_perf_state *table;
@ -139,7 +126,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
* lowest performance state of 'dev' above 'freq' and updates
* 'power' and 'freq' accordingly.
*/
ret = cb->active_power(&power, &freq, dev);
ret = cb->active_power(dev, &power, &freq);
if (ret) {
dev_err(dev, "EM: invalid perf. state: %d\n",
ret);
@ -173,10 +160,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = nr_states - 1; i >= 0; i--) {
unsigned long power_res = em_scale_power(table[i].power);
unsigned long power_res, cost;
if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
ret = cb->get_cost(dev, table[i].frequency, &cost);
if (ret || !cost || cost > EM_MAX_POWER) {
dev_err(dev, "EM: invalid cost %lu %d\n",
cost, ret);
goto free_ps_table;
}
} else {
power_res = em_scale_power(table[i].power);
cost = div64_u64(fmax * power_res, table[i].frequency);
}
table[i].cost = cost;
table[i].cost = div64_u64(fmax * power_res,
table[i].frequency);
if (table[i].cost >= prev_cost) {
table[i].flags = EM_PERF_STATE_INEFFICIENT;
dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
@ -197,7 +196,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
}
static int em_create_pd(struct device *dev, int nr_states,
struct em_data_callback *cb, cpumask_t *cpus)
struct em_data_callback *cb, cpumask_t *cpus,
unsigned long flags)
{
struct em_perf_domain *pd;
struct device *cpu_dev;
@ -215,7 +215,7 @@ static int em_create_pd(struct device *dev, int nr_states,
return -ENOMEM;
}
ret = em_create_perf_table(dev, pd, nr_states, cb);
ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
if (ret) {
kfree(pd);
return ret;
@ -332,6 +332,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
bool milliwatts)
{
unsigned long cap, prev_cap = 0;
unsigned long flags = 0;
int cpu, ret;
if (!dev || !nr_states || !cb)
@ -378,12 +379,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
}
}
ret = em_create_pd(dev, nr_states, cb, cpus);
if (milliwatts)
flags |= EM_PERF_DOMAIN_MILLIWATTS;
else if (cb->get_cost)
flags |= EM_PERF_DOMAIN_ARTIFICIAL;
ret = em_create_pd(dev, nr_states, cb, cpus, flags);
if (ret)
goto unlock;
if (milliwatts)
dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
dev->em_pd->flags |= flags;
em_cpufreq_update_efficiencies(dev);